From caedc9a5cab22dfeb32778c5447c9cc5bdc66b55 Mon Sep 17 00:00:00 2001
From: toravest <torave@stud.ntnu.no>
Date: Tue, 8 Apr 2025 13:35:28 +0200
Subject: [PATCH] add universal function, add axhline 0-punkt

---
 notebooks/notebook_one_day_data.ipynb   | 66 +++++++---------------
 notebooks/notebook_one_week_data.ipynb  | 75 +++++++++----------------
 notebooks/notebook_statistic_data.ipynb | 10 ++++
 3 files changed, 57 insertions(+), 94 deletions(-)

diff --git a/notebooks/notebook_one_day_data.ipynb b/notebooks/notebook_one_day_data.ipynb
index 2d7199e..5f861d0 100644
--- a/notebooks/notebook_one_day_data.ipynb
+++ b/notebooks/notebook_one_day_data.ipynb
@@ -164,26 +164,10 @@
     "# Reads from file using pandas\n",
     "weather_data = pd.read_json(f'../data/output_stedsnavn/data_{filename}.json')\n",
     "\n",
-    "# Checks if 'list' in weather, then proceed because it is the right data\n",
-    "if 'list' in weather_data:\n",
-    "    # Normalize the json for better readability\n",
-    "    df = pd.json_normalize(weather_data['list'])\n",
+    "from my_package.util import extract_city_df\n",
     "\n",
-    "    # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n",
-    "    df = df.drop_duplicates(subset=['dt'])\n",
-    "\n",
-    "    # The weather column dosnt have any releated information, therefor we delete it\n",
-    "    df = df.drop(columns=\"weather\")\n",
-    "\n",
-    "    # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n",
-    "    df['dt'] = pd.to_datetime(df['dt'], unit='s')\n",
-    "    df.set_index('dt', inplace=True)\n",
-    "\n",
-    "    # Ensure the DataFrame is displayed correctly   \n",
-    "    display(df)\n",
-    "    \n",
-    "else:\n",
-    "    print(\"The 'list' key is not present in the JSON file.\")"
+    "df = extract_city_df(weather_data)\n",
+    "display(df)"
    ]
   },
   {
@@ -249,6 +233,9 @@
     "# Add title for the plot\n",
     "plt.title(f'Temperatur {city_name}, ({date})')\n",
     "\n",
+    "# Add marker at 0 temperature\n",
+    "plt.axhline(y=0, color='black', linewidth=1.5)\n",
+    "\n",
     "# Show grid\n",
     "plt.grid()\n",
     "\n",
@@ -277,6 +264,9 @@
     "import matplotlib.dates as mdates\n",
     "import numpy as np\n",
     "\n",
+    "from my_package.util import ensure_rain_column\n",
+    "from my_package.util import ensure_snow_column\n",
+    "\n",
     "x_axis = df.index\n",
     "\n",
     "# Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n",
@@ -285,8 +275,7 @@
     "\n",
     "# If no rain, make the rain column and fill it with NaN\n",
     "except KeyError:\n",
-    "    print(\"'Rain' is not present in the JSON file.\")\n",
-    "    df['rain.1h'] = np.nan\n",
+    "    df = ensure_rain_column(df)\n",
     "\n",
     "# Checks if the snow is a value, it will not be if it is no rain and then cause a KeyError\n",
     "try:\n",
@@ -294,8 +283,7 @@
     "\n",
     "# If no snow, make the snow column and fill it with NaN\n",
     "except KeyError:\n",
-    "    print(\"'Snow' is not present in the JSON file.\")\n",
-    "    df['snow.1h'] = np.nan\n",
+    "    df = ensure_snow_column(df)\n",
     "\n",
     "# Choose the width and height of the plot\n",
     "plt.figure(figsize=(15, 6))\n",
@@ -387,17 +375,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# If rain is stored, fill the NaN with 0\n",
-    "try: \n",
-    "    df['rain.1h'] = df['rain.1h'].fillna(0)\n",
-    "except KeyError:\n",
-    "    print([\"'rain.1h', not in df\"])\n",
+    "from my_package.util import fill_rain_column\n",
+    "from my_package.util import fill_snow_column\n",
     "\n",
-    "# If snow is stored, fill the NaN with 0\n",
-    "try: \n",
-    "    df['snow.1h'] = df['snow.1h'].fillna(0)\n",
-    "except KeyError:\n",
-    "    print(\"['snow.1h'], not in df\")\n",
+    "df = fill_rain_column(df)\n",
+    "\n",
+    "df = fill_snow_column(df)\n",
     "\n",
     "# Drops all the columns, if it has 'NaN' value.\n",
     "df = df.dropna(axis='columns', how='all')\n",
@@ -518,6 +501,9 @@
     "ax1.axhline(y=temp_mean, color='tab:red', linestyle='dashed', label='Mean temperature (°C)')\n",
     "ax1.tick_params(axis='y', labelcolor='tab:red')\n",
     "\n",
+    "# Add marker at 0 temperature\n",
+    "ax1.axhline(y=0, color='black', linewidth=1.5)\n",
+    "\n",
     "# Plot Precipitation as bars on the secondary y-axis\n",
     "ax2 = ax1.twinx()\n",
     "\n",
@@ -571,13 +557,6 @@
     "plt.show()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -688,13 +667,6 @@
     "# Display the predicted temperatures\n",
     "print(f'predicted temperatures: {predicted_temperatures}')\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/notebooks/notebook_one_week_data.ipynb b/notebooks/notebook_one_week_data.ipynb
index dcaef29..a714766 100644
--- a/notebooks/notebook_one_week_data.ipynb
+++ b/notebooks/notebook_one_week_data.ipynb
@@ -160,7 +160,9 @@
     "### Rensking av riktig data\n",
     "Vi går inn i 'list' for å finne den relevante informasjonen, og ikke bare meta-informasjon.\n",
     "\n",
-    "Sørger for å fjerne duplikater, og andre irelevante kolonner. Samt setter index kolonnen til tid."
+    "Sørger for å fjerne duplikater, og andre irelevante kolonner. Samt setter index kolonnen til tid.\n",
+    "\n",
+    "Sjekker om kolonnene ['rain.1h'] eller ['snow.1h'] er tilstede, hvis ikke blir de lagd og fylt med NaN. Vi trenger denne kolonnen, selvom den er tom, for senere å plotte grafen og unngå feilmeldinger. Dette fordi vi senere sjekker manglende verdier som NaN. "
    ]
   },
   {
@@ -171,43 +173,29 @@
    "source": [
     "import numpy as np\n",
     "\n",
-    "# Goes into the 'list' to get the needed and relevant information\n",
-    "if 'list' in data:\n",
-    "    # Normalize the json, for better readability\n",
-    "    df = pd.json_normalize(data['list'])\n",
-    "\n",
-    "    # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n",
-    "    df = df.drop_duplicates(subset=['dt'])\n",
+    "from my_package.util import extract_city_df\n",
+    "from my_package.util import ensure_rain_column\n",
+    "from my_package.util import ensure_snow_column\n",
     "\n",
-    "    # The weather column does not have any releated information, therefor we delete it\n",
-    "    df = df.drop(columns=\"weather\")\n",
+    "df = extract_city_df(data)\n",
     "\n",
-    "    # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n",
-    "    df['dt'] = pd.to_datetime(df['dt'], unit='s')\n",
-    "    df.set_index('dt', inplace=True)\n",
+    "# Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n",
+    "try:\n",
+    "    rain = df['rain.1h']\n",
     "\n",
-    "        # Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n",
-    "    try:\n",
-    "        rain = df['rain.1h']\n",
-    "\n",
-    "    # If no rain, make the rain column and fill it with NaN\n",
-    "    except KeyError:\n",
-    "        print(\"'Rain' is not present in the JSON file.\")\n",
-    "        df['rain.1h'] = np.nan\n",
+    "# If no rain, make the rain column and fill it with NaN\n",
+    "except KeyError:\n",
+    "    df = ensure_rain_column(df)\n",
     "\n",
-    "    # Checks if the snow is a value, it will not be if it is no rain and then cause a KeyError\n",
-    "    try:\n",
-    "        snow = df['snow.1h']\n",
+    "# Checks if the snow is a value, it will not be if it is no snow and then cause a KeyError\n",
+    "try:\n",
+    "    snow = df['snow.1h']\n",
     "\n",
-    "    # If no snow, make the snow column and fill it with NaN\n",
-    "    except KeyError:\n",
-    "        print(\"'Snow' is not present in the JSON file.\")\n",
-    "        df['snow.1h'] = np.nan\n",
+    "# If no snow, make the snow column and fill it with NaN\n",
+    "except KeyError:\n",
+    "    df = ensure_snow_column(df)\n",
     "\n",
-    "    # Display the datafram, with the changes\n",
-    "    display(df)\n",
-    "else:\n",
-    "    print(\"The 'list' key is not present in the JSON file.\")"
+    "display(df)  "
    ]
   },
   {
@@ -274,17 +262,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# If rain is stored, fill the NaN with 0\n",
-    "try: \n",
-    "    df['rain.1h'] = df['rain.1h'].fillna(0)\n",
-    "except KeyError:\n",
-    "    print([\"'rain.1h', not in df\"])\n",
+    "from my_package.util import fill_rain_column\n",
+    "from my_package.util import fill_snow_column\n",
     "\n",
-    "# If snow is stored, fill the NaN with 0\n",
-    "try: \n",
-    "    df['snow.1h'] = df['snow.1h'].fillna(0)\n",
-    "except KeyError:\n",
-    "    print(\"['snow.1h'], not in df\")\n",
+    "df = fill_rain_column(df)\n",
+    "df = fill_snow_column(df)\n",
     "\n",
     "# If wind_gust is stored, fill the NaN with 0\n",
     "try: \n",
@@ -407,13 +389,13 @@
     "# Plot Precipitation as bars on the secondary y-axis\n",
     "ax2 = ax1.twinx()\n",
     "\n",
+    "bin_count = len(x_axis)\n",
+    "\n",
     "# Add rain\n",
-    "# ax2.bar(x_axis, rain, color='tab:blue', alpha=0.5, width=0.02, label='Rain (mm)')\n",
-    "ax2.hist(x_axis, bins=len(x_axis), weights=rain, color='tab:blue', alpha=0.5, label= 'Rain (mm)', bottom=snow)\n",
+    "ax2.hist(x_axis, bins=bin_count, weights=rain, color='tab:blue', alpha=0.5, label= 'Rain (mm)', bottom=snow)\n",
     "\n",
     "# Add snow\n",
-    "# ax2.bar(x_axis, snow, color='tab:grey', alpha=0.5, width=0.02, label='Snow (mm)')\n",
-    "ax2.hist(x_axis, bins=len(x_axis), weights=snow, color='tab:gray', alpha=0.5, label= 'Snow (mm)')\n",
+    "ax2.hist(x_axis, bins=bin_count, weights=snow, color='tab:gray', alpha=0.5, label= 'Snow (mm)')\n",
     "\n",
     "# Design the y-axis for precipiation\n",
     "ax2.set_ylabel(\"Precipitation (mm)\", color='tab:blue')\n",
@@ -596,7 +578,6 @@
     "plot_path = os.path.join(output_folder, f\"weather_data_plot{city_name}.png\")\n",
     "plt.savefig(plot_path)  # Save the plot as a PNG file\n",
     "\n",
-    "\n",
     "# Show the plot\n",
     "plt.show()"
    ]
diff --git a/notebooks/notebook_statistic_data.ipynb b/notebooks/notebook_statistic_data.ipynb
index 996d9cc..cdd8ca1 100644
--- a/notebooks/notebook_statistic_data.ipynb
+++ b/notebooks/notebook_statistic_data.ipynb
@@ -196,6 +196,9 @@
     "plt.xlabel(\"Date\")\n",
     "plt.ylabel(\"Temperature (°C)\")\n",
     "\n",
+    "# Add marker at 0 temperature\n",
+    "plt.axhline(y=0, color='black', linewidth=1.5)\n",
+    "\n",
     "# Customize the x-axis to show ticks and labels only at the start of each month\n",
     "plt.gca().xaxis.set_major_locator(mdates.MonthLocator())  \n",
     "# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n",
@@ -262,6 +265,9 @@
     "ax1.set_ylabel('Temperature (°C)', color='tab:red')\n",
     "ax1.tick_params(axis='y', labelcolor='tab:red')\n",
     "\n",
+    "# Add marker at 0 temperature\n",
+    "ax1.axhline(y=0, color='black', linewidth=1.5)\n",
+    "\n",
     "# Plot precipitation as bars on the secondary y-axis\n",
     "ax2 = ax1.twinx()\n",
     "ax2.bar(x_axis, precipitation, color='tab:blue', alpha=0.5, width=1, label='Precipitation (mm)')\n",
@@ -349,6 +355,8 @@
     "plt.plot(x_axis, temp_record_max, color='tab:red', label = 'Max temperatur')\n",
     "plt.plot(x_axis, temp_record_min, color='tab:blue', label = 'Min temperatur')\n",
     "\n",
+    "# Add marker at 0 temperature\n",
+    "plt.axhline(y=0, color='black', linewidth=1.5)\n",
     "\n",
     "# Customize the x-axis to show ticks and labels only at the start of each month\n",
     "plt.gca().xaxis.set_major_locator(mdates.MonthLocator())  \n",
@@ -482,6 +490,8 @@
     "plt.gca().xaxis.set_major_locator(mdates.MonthLocator())  \n",
     "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b'))  # Format ticks to show abbreviated month names (e.g., Jan, Feb)\n",
     "\n",
+    "plt.axhline(y=0, color='black', linewidth=1.5)\n",
+    "\n",
     "# Add labels, title, and legend\n",
     "plt.xlabel('Month-Day')\n",
     "plt.ylabel('Temperature (°C)')\n",