From caedc9a5cab22dfeb32778c5447c9cc5bdc66b55 Mon Sep 17 00:00:00 2001 From: toravest Date: Tue, 8 Apr 2025 13:35:28 +0200 Subject: [PATCH] add universal function, add axhline 0-punkt --- notebooks/notebook_one_day_data.ipynb | 66 +++++++--------------- notebooks/notebook_one_week_data.ipynb | 75 +++++++++---------------- notebooks/notebook_statistic_data.ipynb | 10 ++++ 3 files changed, 57 insertions(+), 94 deletions(-) diff --git a/notebooks/notebook_one_day_data.ipynb b/notebooks/notebook_one_day_data.ipynb index 2d7199e..5f861d0 100644 --- a/notebooks/notebook_one_day_data.ipynb +++ b/notebooks/notebook_one_day_data.ipynb @@ -164,26 +164,10 @@ "# Reads from file using pandas\n", "weather_data = pd.read_json(f'../data/output_stedsnavn/data_{filename}.json')\n", "\n", - "# Checks if 'list' in weather, then proceed because it is the right data\n", - "if 'list' in weather_data:\n", - " # Normalize the json for better readability\n", - " df = pd.json_normalize(weather_data['list'])\n", + "from my_package.util import extract_city_df\n", "\n", - " # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n", - " df = df.drop_duplicates(subset=['dt'])\n", - "\n", - " # The weather column dosnt have any releated information, therefor we delete it\n", - " df = df.drop(columns=\"weather\")\n", - "\n", - " # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n", - " df['dt'] = pd.to_datetime(df['dt'], unit='s')\n", - " df.set_index('dt', inplace=True)\n", - "\n", - " # Ensure the DataFrame is displayed correctly \n", - " display(df)\n", - " \n", - "else:\n", - " print(\"The 'list' key is not present in the JSON file.\")" + "df = extract_city_df(weather_data)\n", + "display(df)" ] }, { @@ -249,6 +233,9 @@ "# Add title for the plot\n", "plt.title(f'Temperatur {city_name}, ({date})')\n", "\n", + "# Add marker at 0 temperature\n", + "plt.axhline(y=0, color='black', linewidth=1.5)\n", + "\n", "# Show grid\n", "plt.grid()\n", "\n", @@ -277,6 +264,9 @@ "import matplotlib.dates as mdates\n", "import numpy as np\n", "\n", + "from my_package.util import ensure_rain_column\n", + "from my_package.util import ensure_snow_column\n", + "\n", "x_axis = df.index\n", "\n", "# Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n", @@ -285,8 +275,7 @@ "\n", "# If no rain, make the rain column and fill it with NaN\n", "except KeyError:\n", - " print(\"'Rain' is not present in the JSON file.\")\n", - " df['rain.1h'] = np.nan\n", + " df = ensure_rain_column(df)\n", "\n", "# Checks if the snow is a value, it will not be if it is no rain and then cause a KeyError\n", "try:\n", @@ -294,8 +283,7 @@ "\n", "# If no snow, make the snow column and fill it with NaN\n", "except KeyError:\n", - " print(\"'Snow' is not present in the JSON file.\")\n", - " df['snow.1h'] = np.nan\n", + " df = ensure_snow_column(df)\n", "\n", "# Choose the width and height of the plot\n", "plt.figure(figsize=(15, 6))\n", @@ -387,17 +375,12 @@ "metadata": {}, "outputs": [], "source": [ - "# If rain is stored, fill the NaN with 0\n", - "try: \n", - " df['rain.1h'] = df['rain.1h'].fillna(0)\n", - "except KeyError:\n", - " print([\"'rain.1h', not in df\"])\n", + "from my_package.util import fill_rain_column\n", + "from my_package.util import fill_snow_column\n", "\n", - "# If snow is stored, fill the NaN with 0\n", - "try: \n", - " df['snow.1h'] = df['snow.1h'].fillna(0)\n", - "except KeyError:\n", - " print(\"['snow.1h'], not in df\")\n", + "df = fill_rain_column(df)\n", + "\n", + "df = fill_snow_column(df)\n", "\n", "# Drops all the columns, if it has 'NaN' value.\n", "df = df.dropna(axis='columns', how='all')\n", @@ -518,6 +501,9 @@ "ax1.axhline(y=temp_mean, color='tab:red', linestyle='dashed', label='Mean temperature (°C)')\n", "ax1.tick_params(axis='y', labelcolor='tab:red')\n", "\n", + "# Add marker at 0 temperature\n", + "ax1.axhline(y=0, color='black', linewidth=1.5)\n", + "\n", "# Plot Precipitation as bars on the secondary y-axis\n", "ax2 = ax1.twinx()\n", "\n", @@ -571,13 +557,6 @@ "plt.show()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -688,13 +667,6 @@ "# Display the predicted temperatures\n", "print(f'predicted temperatures: {predicted_temperatures}')\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/notebooks/notebook_one_week_data.ipynb b/notebooks/notebook_one_week_data.ipynb index dcaef29..a714766 100644 --- a/notebooks/notebook_one_week_data.ipynb +++ b/notebooks/notebook_one_week_data.ipynb @@ -160,7 +160,9 @@ "### Rensking av riktig data\n", "Vi går inn i 'list' for å finne den relevante informasjonen, og ikke bare meta-informasjon.\n", "\n", - "Sørger for å fjerne duplikater, og andre irelevante kolonner. Samt setter index kolonnen til tid." + "Sørger for å fjerne duplikater, og andre irelevante kolonner. Samt setter index kolonnen til tid.\n", + "\n", + "Sjekker om kolonnene ['rain.1h'] eller ['snow.1h'] er tilstede, hvis ikke blir de lagd og fylt med NaN. Vi trenger denne kolonnen, selvom den er tom, for senere å plotte grafen og unngå feilmeldinger. Dette fordi vi senere sjekker manglende verdier som NaN. " ] }, { @@ -171,43 +173,29 @@ "source": [ "import numpy as np\n", "\n", - "# Goes into the 'list' to get the needed and relevant information\n", - "if 'list' in data:\n", - " # Normalize the json, for better readability\n", - " df = pd.json_normalize(data['list'])\n", - "\n", - " # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n", - " df = df.drop_duplicates(subset=['dt'])\n", + "from my_package.util import extract_city_df\n", + "from my_package.util import ensure_rain_column\n", + "from my_package.util import ensure_snow_column\n", "\n", - " # The weather column does not have any releated information, therefor we delete it\n", - " df = df.drop(columns=\"weather\")\n", + "df = extract_city_df(data)\n", "\n", - " # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n", - " df['dt'] = pd.to_datetime(df['dt'], unit='s')\n", - " df.set_index('dt', inplace=True)\n", + "# Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n", + "try:\n", + " rain = df['rain.1h']\n", "\n", - " # Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n", - " try:\n", - " rain = df['rain.1h']\n", - "\n", - " # If no rain, make the rain column and fill it with NaN\n", - " except KeyError:\n", - " print(\"'Rain' is not present in the JSON file.\")\n", - " df['rain.1h'] = np.nan\n", + "# If no rain, make the rain column and fill it with NaN\n", + "except KeyError:\n", + " df = ensure_rain_column(df)\n", "\n", - " # Checks if the snow is a value, it will not be if it is no rain and then cause a KeyError\n", - " try:\n", - " snow = df['snow.1h']\n", + "# Checks if the snow is a value, it will not be if it is no snow and then cause a KeyError\n", + "try:\n", + " snow = df['snow.1h']\n", "\n", - " # If no snow, make the snow column and fill it with NaN\n", - " except KeyError:\n", - " print(\"'Snow' is not present in the JSON file.\")\n", - " df['snow.1h'] = np.nan\n", + "# If no snow, make the snow column and fill it with NaN\n", + "except KeyError:\n", + " df = ensure_snow_column(df)\n", "\n", - " # Display the datafram, with the changes\n", - " display(df)\n", - "else:\n", - " print(\"The 'list' key is not present in the JSON file.\")" + "display(df) " ] }, { @@ -274,17 +262,11 @@ "metadata": {}, "outputs": [], "source": [ - "# If rain is stored, fill the NaN with 0\n", - "try: \n", - " df['rain.1h'] = df['rain.1h'].fillna(0)\n", - "except KeyError:\n", - " print([\"'rain.1h', not in df\"])\n", + "from my_package.util import fill_rain_column\n", + "from my_package.util import fill_snow_column\n", "\n", - "# If snow is stored, fill the NaN with 0\n", - "try: \n", - " df['snow.1h'] = df['snow.1h'].fillna(0)\n", - "except KeyError:\n", - " print(\"['snow.1h'], not in df\")\n", + "df = fill_rain_column(df)\n", + "df = fill_snow_column(df)\n", "\n", "# If wind_gust is stored, fill the NaN with 0\n", "try: \n", @@ -407,13 +389,13 @@ "# Plot Precipitation as bars on the secondary y-axis\n", "ax2 = ax1.twinx()\n", "\n", + "bin_count = len(x_axis)\n", + "\n", "# Add rain\n", - "# ax2.bar(x_axis, rain, color='tab:blue', alpha=0.5, width=0.02, label='Rain (mm)')\n", - "ax2.hist(x_axis, bins=len(x_axis), weights=rain, color='tab:blue', alpha=0.5, label= 'Rain (mm)', bottom=snow)\n", + "ax2.hist(x_axis, bins=bin_count, weights=rain, color='tab:blue', alpha=0.5, label= 'Rain (mm)', bottom=snow)\n", "\n", "# Add snow\n", - "# ax2.bar(x_axis, snow, color='tab:grey', alpha=0.5, width=0.02, label='Snow (mm)')\n", - "ax2.hist(x_axis, bins=len(x_axis), weights=snow, color='tab:gray', alpha=0.5, label= 'Snow (mm)')\n", + "ax2.hist(x_axis, bins=bin_count, weights=snow, color='tab:gray', alpha=0.5, label= 'Snow (mm)')\n", "\n", "# Design the y-axis for precipiation\n", "ax2.set_ylabel(\"Precipitation (mm)\", color='tab:blue')\n", @@ -596,7 +578,6 @@ "plot_path = os.path.join(output_folder, f\"weather_data_plot{city_name}.png\")\n", "plt.savefig(plot_path) # Save the plot as a PNG file\n", "\n", - "\n", "# Show the plot\n", "plt.show()" ] diff --git a/notebooks/notebook_statistic_data.ipynb b/notebooks/notebook_statistic_data.ipynb index 996d9cc..cdd8ca1 100644 --- a/notebooks/notebook_statistic_data.ipynb +++ b/notebooks/notebook_statistic_data.ipynb @@ -196,6 +196,9 @@ "plt.xlabel(\"Date\")\n", "plt.ylabel(\"Temperature (°C)\")\n", "\n", + "# Add marker at 0 temperature\n", + "plt.axhline(y=0, color='black', linewidth=1.5)\n", + "\n", "# Customize the x-axis to show ticks and labels only at the start of each month\n", "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", "# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", @@ -262,6 +265,9 @@ "ax1.set_ylabel('Temperature (°C)', color='tab:red')\n", "ax1.tick_params(axis='y', labelcolor='tab:red')\n", "\n", + "# Add marker at 0 temperature\n", + "ax1.axhline(y=0, color='black', linewidth=1.5)\n", + "\n", "# Plot precipitation as bars on the secondary y-axis\n", "ax2 = ax1.twinx()\n", "ax2.bar(x_axis, precipitation, color='tab:blue', alpha=0.5, width=1, label='Precipitation (mm)')\n", @@ -349,6 +355,8 @@ "plt.plot(x_axis, temp_record_max, color='tab:red', label = 'Max temperatur')\n", "plt.plot(x_axis, temp_record_min, color='tab:blue', label = 'Min temperatur')\n", "\n", + "# Add marker at 0 temperature\n", + "plt.axhline(y=0, color='black', linewidth=1.5)\n", "\n", "# Customize the x-axis to show ticks and labels only at the start of each month\n", "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", @@ -482,6 +490,8 @@ "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) # Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", "\n", + "plt.axhline(y=0, color='black', linewidth=1.5)\n", + "\n", "# Add labels, title, and legend\n", "plt.xlabel('Month-Day')\n", "plt.ylabel('Temperature (°C)')\n",