diff --git a/notebooks/notebook_compare_statistic_data.ipynb b/notebooks/notebook_compare_statistic_data.ipynb index d91d393..3a94c06 100644 --- a/notebooks/notebook_compare_statistic_data.ipynb +++ b/notebooks/notebook_compare_statistic_data.ipynb @@ -145,47 +145,30 @@ "source": [ "import pandas as pd\n", "import json\n", + "from pandasql import sqldf\n", "\n", "file_path = f'../data/output_sammenligning_statistikk/data_{filename}.json'\n", "\n", + "from my_package.util import extract_city_data_stat\n", "\n", "# Load the whole JSON file\n", "with open(file_path, 'r') as f:\n", " all_city_data = json.load(f)\n", "\n", - "def extract_city_data(data):\n", - " # Checks if the 'result' column is in the data\n", - " if 'result' in data:\n", - " # Normalize the json and store it as a dataframe for better readability\n", - " df = pd.json_normalize(data['result'])\n", + "# Separate variables for each city\n", + "city_1_df = extract_city_data_stat(all_city_data.get('city_1'))\n", + "city_1_df['city'] = 'city_1'\n", + "city_1_df['city_name'] = city_1\n", "\n", - " # Display the dataframe\n", - " return df\n", - " else:\n", - " print(\"'result' not in data\")\n", - " return None\n", + "city_2_df = extract_city_data_stat(all_city_data.get('city_2'))\n", + "city_2_df['city'] = 'city_2'\n", + "city_2_df['city_name'] = city_2\n", "\n", - "# Separate variables for each city\n", - "city_1_df = extract_city_data(all_city_data.get('city_1'))\n", - "city_2_df = extract_city_data(all_city_data.get('city_2'))\n", - "\n", - "# Checks if the data is not empty, aka there are values\n", - "if city_1_df is not None:\n", - " # Prints the city name\n", - " print(f\"{city_1} data:\")\n", - " # Display the dataframe readable\n", - " display(city_1_df)\n", - "else:\n", - " print('\"city_1_df\" is empty')\n", - "\n", - "# Checks if the data is not empty, aka there are values\n", - "if city_2_df is not None:\n", - " # Prints the city name\n", - " print(f\"{city_2} data:\")\n", - " # Display the dataframe readable\n", - " display(city_2_df)\n", - "else:\n", - " print('\"city_2_df\" is empty')\n" + "# Concat both city df into one, to be able to use pandas sql\n", + "both_cities_df = pd.concat([city_1_df, city_2_df])\n", + "\n", + "# Display all the values from both cities, with city 1 first, then city 2\n", + "display(sqldf('''SELECT * FROM both_cities_df'''))\n" ] }, { @@ -206,36 +189,29 @@ "metadata": {}, "outputs": [], "source": [ - "def clean_df(df):\n", - " # Drop all columns that end with '...' using the filter function\n", - " df = df.drop(columns=df.filter(like='.p25').columns)\n", - " df = df.drop(columns=df.filter(like='.p75').columns)\n", - " df = df.drop(columns=df.filter(like='.st_dev').columns)\n", - " df = df.drop(columns=df.filter(like='.num').columns)\n", - "\n", - " return df\n", + "from my_package.util import clean_df\n", "\n", "# Cleans data for unessecarily columns\n", - "city_1_df = clean_df(city_1_df)\n", - "city_2_df = clean_df(city_2_df)\n", + "both_cities_df = clean_df(both_cities_df)\n", "\n", - "display(city_1_df)\n", - "display(city_2_df)" + "display(both_cities_df)" ] }, { "cell_type": "markdown", - "id": "97847344", + "id": "4d493df2", "metadata": {}, "source": [ - "### Plotter temperatur\n", - "Denne koden plotter og sammenlginer data basert på gjennomsnitts temperatur gjennom året. For å sikre lagring av de ulike kjøringene, vil grafen bli lagret i mappen \"../data/output_fig_sammenligning/mean_temp_plot_{city_1}_(city_2).json\"\n" + "### Viser temperaturen\n", + "Vi bruker pandas SQL for å hente ut temperaturen for begge stedene, og lagrer de i en tabell. Ved hjelp av 'pivot' , en innebgyd funksjon for å rotere tabeller, setter vi den opp for bedre lesbarhet, ved å sette begge byene ved siden av hverandre og en sammenslåing av kolonnene måned og dag ('month_day') som index. Dette gjør det lettere å sammenligne temperaturen for begge stedene til samme tid.\n", + "\n", + "Ved hjelp av en pandas SQL setning kan vi hente og lagre gjennomsnitt, maksimalt og minste målte temperatur for begge stedene. Senere kan vi bare skrive en SELECT setning til denne variabelen, for å eksepelhvis hente ut gjennomsnitts data for sted 1." ] }, { "cell_type": "code", "execution_count": null, - "id": "851e62c8", + "id": "33d5a1c3", "metadata": {}, "outputs": [], "source": [ @@ -253,58 +229,36 @@ "output_folder = \"../data/output_fig\"\n", "os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist\n", "\n", - "# Converts to and make a new column with celsius temp, and not kelvin\n", - "city_1_df['temp.mean_celsius'] = kelvin_to_celsius(city_1_df['temp.mean'])\n", - "city_2_df['temp.mean_celsius'] = kelvin_to_celsius(city_2_df['temp.mean'])\n", - "\n", - "temp_city_1 = city_1_df['temp.mean_celsius']\n", - "temp_city_2 = city_2_df['temp.mean_celsius']\n", - "\n", - "\n", - "temp_mean_city_1 = temp_city_1.mean().round(2)\n", - "temp_mean_city_2 = temp_city_2.mean().round(2)\n", - "\n", - "# Convert from day and month, to datetime\n", - "# df['date'] = pd.to_datetime(df[['month', 'day']].assign(year=2024))\n", + "both_cities_df['temp.mean_celsius'] = kelvin_to_celsius(both_cities_df['temp.mean'])\n", + "both_cities_df['temp.max_celsius'] = kelvin_to_celsius(both_cities_df['temp.record_max'])\n", + "both_cities_df['temp.min_celsius'] = kelvin_to_celsius(both_cities_df['temp.record_min'])\n", "\n", "# Create a new column that concatenates month and day (e.g., \"03-01\" for March 1)\n", - "city_1_df['month_day'] = city_1_df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", - "city_2_df['month_day'] = city_2_df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", - "\n", - "# Plot the graph of the mean temperature\n", - "plt.figure(figsize=(12, 6))\n", - "plt.plot(city_1_df['month_day'], temp_city_1, color='#2E8B57', label=f'temp {city_1}')\n", - "plt.plot(city_2_df['month_day'], temp_city_2, color='#FFD700', label=f'temp {city_2}')\n", - "\n", - "plt.axhline(temp_mean_city_1, color='#2E8B57', linestyle='dashed', alpha=0.7, label=f'Mean Temperature {city_1}')\n", - "plt.axhline(temp_mean_city_2, color='#FFD700', linestyle='dashed', alpha=0.7, label=f'Mean Temperature {city_2}')\n", - "\n", - "# Label for easier reading and understanding of the plot\n", - "# plt.title(f\"Mean temp - statistic historical {city_name}\")\n", - "plt.xlabel(\"Date\")\n", - "plt.ylabel(\"Temperature (°C)\")\n", - "\n", - "# Add marker at 0 temperature\n", - "plt.axhline(y=0, color='black', linewidth=1.5)\n", - "\n", - "# Customize the x-axis to show ticks and labels only at the start of each month\n", - "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", - "# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", - "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) \n", - "\n", - "plt.xticks(rotation=45)\n", - "plt.yticks(range(-20, 30, 2))\n", - "plt.tight_layout()\n", - "plt.grid()\n", - "\n", - "plt.legend()\n", - "\n", - "# Save the plot to the data/output_fig folder\n", - "# plot_path = os.path.join(output_folder, f\"mean_temp_plot_{city_name}.png\")\n", - "# plt.savefig(plot_path) # Save the plot as a PNG file\n", - "\n", - "# Show the plot\n", - "plt.show()\n" + "both_cities_df['month_day'] = both_cities_df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", + "\n", + "temp_data = sqldf('''\n", + " SELECT month_day, city_name, `temp.mean_celsius` as temp\n", + " FROM both_cities_df\n", + "''')\n", + "\n", + "# Set the dt to index, city for \"header\" and the temp as values for each city each hour\n", + "pivoted_temp = temp_data.pivot(index='month_day', columns='city_name', values='temp')\n", + "\n", + "# Rename the columns in the pivoted_temp\n", + "pivoted_temp.columns = [f\"{col}_main_temp\" for col in pivoted_temp.columns]\n", + "# Display the final result\n", + "print(f\"Main temperature for {city_1} and {city_2}\")\n", + "display(pivoted_temp)\n", + "\n", + "# Extract and stores temperatur data for each city using pandas sql\n", + "stat_temp_per_city = sqldf('''\n", + " SELECT city, city_name, AVG(`temp.mean_celsius`) AS avg_temp, MAX(`temp.max_celsius`) AS max_temp, MIN(`temp.min_celsius`) AS min_temp\n", + " FROM both_cities_df\n", + " GROUP BY city_name\n", + " ''')\n", + "\n", + "# Display the temperatur stats for both cities\n", + "display(stat_temp_per_city)" ] }, { @@ -319,48 +273,35 @@ { "cell_type": "code", "execution_count": null, - "id": "5baab98a", + "id": "83aed603", "metadata": {}, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "import matplotlib.dates as mdates\n", - "import os\n", - "import sys\n", - "\n", - "# Gets the absolute path to the src folder\n", - "sys.path.append(os.path.abspath(\"../src\"))\n", - "\n", - "# Import the kelvin to celsius function\n", - "from my_package.util import kelvin_to_celsius\n", - "\n", - "# Defines the output folder for the figure, and makes it if is does not exsist\n", "output_folder = \"../data/output_fig\"\n", - "os.makedirs(output_folder, exist_ok=True) \n", + "os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist\n", "\n", - "# Converts to and make a new column with celsius temp, and not kelvin\n", - "city_1_df['temp.mean_celsius'] = kelvin_to_celsius(city_1_df['temp.mean'])\n", - "temp_city_1 = city_1_df['temp.mean_celsius']\n", - "precipitation_city_1 = city_1_df['precipitation.mean']\n", - "wind_city_1 = city_1_df['wind.mean']\n", + "temp_city_1 = sqldf('''SELECT `temp.mean_celsius` FROM both_cities_df WHERE city = \"city_1\"''')\n", + "temp_city_2 = sqldf('''SELECT `temp.mean_celsius` FROM both_cities_df WHERE city = \"city_2\"''')\n", "\n", - "# Converts to and make a new column with celsius temp, and not kelvin\n", - "city_2_df['temp.mean_celsius'] = kelvin_to_celsius(city_2_df['temp.mean'])\n", - "temp_city_2 = city_2_df['temp.mean_celsius']\n", - "precipitation_city_2 = city_2_df['precipitation.mean']\n", - "wind_city_2 = city_2_df['wind.mean']\n", + "# Because pandas sql returnes the value as a dataframe, we need to get the actual value (first row, first column)\n", + "temp_mean_city_1 = sqldf('''SELECT avg_temp FROM stat_temp_per_city WHERE city = \"city_1\"''').iloc[0, 0]\n", + "temp_mean_city_2 = sqldf('''SELECT avg_temp FROM stat_temp_per_city WHERE city = \"city_2\"''').iloc[0, 0]\n", "\n", - "# Create a new column that concatenates month and day (e.g., \"03-01\" for March 1)\n", - "city_1_df['month_day'] = city_1_df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", - "city_2_df['month_day'] = city_2_df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", + "# Extract precipitation values for both cities\n", + "# Because pandas sql returnes the value as a dataframe, we need to get the actual value (all rows, first column)\n", + "precipitation_city_1 = sqldf('''SELECT `precipitation.mean` FROM both_cities_df WHERE city = \"city_1\"''').iloc[:,0]\n", + "precipitation_city_2 = sqldf('''SELECT `precipitation.mean` FROM both_cities_df WHERE city = \"city_2\"''').iloc[:,0]\n", + "\n", + "wind_city_1 = sqldf('''SELECT `wind.mean` FROM both_cities_df WHERE city = \"city_1\"''').iloc[:,0]\n", + "wind_city_2 = sqldf('''SELECT `wind.mean` FROM both_cities_df WHERE city = \"city_2\"''').iloc[:,0]\n", "\n", "x_axis = city_1_df['month_day']\n", "\n", "fig, (ax1, ax3) = plt.subplots(2, 1, figsize = (15, 8), sharex=True)\n", "\n", "# Plot temperature on the primary y-axis\n", - "ax1.plot(x_axis, temp_city_1, color='#2E8B57', label=f'Temperature {city_1}')\n", - "ax1.plot(x_axis, temp_city_2, color='#FFD700', label=f'Temperature {city_2}')\n", + "ax1.plot(x_axis, temp_city_1, color='#008080', label=f'Temperature {city_1}')\n", + "ax1.plot(x_axis, temp_city_2, color='#FFA500', label=f'Temperature {city_2}')\n", "# ax1.set_xlabel('Datetime')\n", "ax1.set_ylabel('Temperature (°C)', color='tab:red')\n", "ax1.tick_params(axis='y', labelcolor='tab:red')\n", @@ -368,14 +309,15 @@ "# Add marker at 0 temperature\n", "ax1.axhline(y=0, color='black', linewidth=1.5)\n", "\n", + "ax1.axhline(y=temp_mean_city_1, color='#008080', linestyle='dashed', alpha=0.7, label=f'Mean Temperature {city_1}')\n", + "ax1.axhline(y=temp_mean_city_2, color='#FFA500', linestyle='dashed', alpha=0.7, label=f'Mean Temperature {city_2}')\n", + "\n", "# Plot precipitation as bars on the secondary y-axis\n", "ax2 = ax1.twinx()\n", "\n", - "# ax2.bar(x_axis, precipitation_city_1, color='#2E8B57', alpha=0.5, width=1, label=f'Precipitation {city_1}')\n", - "# ax2.bar(x_axis, precipitation_city_2, color='#FFD700', alpha=0.5, width=1, label=f'Precipitation {city_2}')\n", - "\n", - "ax2.fill_between(x_axis, precipitation_city_1, color='green', alpha=0.3, label=f'{city_1} Total')\n", - "ax2.fill_between(x_axis, precipitation_city_2, color='gold', alpha=0.3, label=f'{city_2} Total')\n", + "# Fill between the precipitation, for an easier and more readable vizualisation\n", + "ax2.fill_between(x_axis, precipitation_city_1, color='#80C0C0', alpha=0.5, label=f'{city_1} Total', edgecolor = '#008080')\n", + "ax2.fill_between(x_axis, precipitation_city_2, color='#FFD280', alpha=0.5, label=f'{city_2} Total', edgecolor = '#FFA500')\n", "\n", "ax2.set_ylabel(\"Precipitation (mm)\", color='tab:blue')\n", "ax2.tick_params(axis='y', labelcolor='tab:blue')\n", @@ -384,8 +326,8 @@ "ax1.legend(loc='upper left')\n", "ax2.legend(loc='upper right')\n", "\n", - "ax3.plot(x_axis, wind_city_1, color='#2E8B57', label=f'Wind {city_1}')\n", - "ax3.plot(x_axis, wind_city_2, color='#FFD700', label=f'Wind {city_2}')\n", + "ax3.plot(x_axis, wind_city_1, color='#008080', label=f'Wind {city_1}')\n", + "ax3.plot(x_axis, wind_city_2, color='#FFA500', label=f'Wind {city_2}')\n", "# ax3.plot(x_axis, wind_speed, color='tab:purple', linestyle='dashed', label='Wind_speed')\n", "ax3.set_ylabel('Wind (m/s)')\n", "ax3.set_xlabel('Datetime')\n", @@ -399,12 +341,15 @@ "# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) \n", "\n", + "plt.title(f\"Statistic weather data for ({city_1} and {city_2})\")\n", "plt.tight_layout()\n", "\n", - "# Show the plot\n", - "plt.show()\n", + "# Save the plot to the data/output_fig folder\n", + "plot_path = os.path.join(output_folder, f\"mean_temp_plot_{city_1}_{city_2}.png\")\n", + "plt.savefig(plot_path) # Save the plot as a PNG file\n", "\n", - "# print(df['precipitation.max'].max())" + "# Show the plot\n", + "plt.show()" ] } ], diff --git a/notebooks/notebook_statistic_data.ipynb b/notebooks/notebook_statistic_data.ipynb index 7d943d9..6e70923 100644 --- a/notebooks/notebook_statistic_data.ipynb +++ b/notebooks/notebook_statistic_data.ipynb @@ -109,16 +109,10 @@ "outputs": [], "source": [ "import pandas as pd\n", + "from my_package.util import extract_city_data_stat\n", "\n", - "# Checks if the 'result' column is in the data\n", - "if 'result' in data:\n", - " # Normalize the json and store it as a dataframe for better readability\n", - " df = pd.json_normalize(data['result'])\n", - "\n", - " # Display the dataframe\n", - " display(df)\n", - "else:\n", - " print(\"'result' not in data\")" + "df = extract_city_data_stat(data)\n", + "display(df)" ] }, { @@ -137,11 +131,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Drop all columns that end with '...' using the filter function\n", - "df = df.drop(columns=df.filter(like='.p25').columns)\n", - "df = df.drop(columns=df.filter(like='.p75').columns)\n", - "df = df.drop(columns=df.filter(like='.st_dev').columns)\n", - "df = df.drop(columns=df.filter(like='.num').columns)\n", + "from my_package.util import clean_df\n", + "\n", + "# Cleans data for unessecarily columns\n", + "df = clean_df(df)\n", "\n", "display(df)" ] @@ -150,9 +143,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Plotter temperatur\n", - "Denne koden plotter data basert på gjennomsnitts temperatur gjennom året. For å sikre lagring av de ulike kjøringene, vil grafen bli lagret i mappen \"../data/output_fig/mean_temp_plot_{city_name}.json\"\n", - "\n" + "### Viser temperaturen\n", + "Vi bruker pandas SQL for å hente ut ønsket tempeartur fra statistic_data og lagrer den i en tabell.\n", + "\n", + "Ved hjelp av en pandas SQL setning kan vi hente og lagre gjennomsnitt, maksimalt og minste målte temperatur. Senere kan vi bare skrive en SELECT setning til denne variabelen." ] }, { @@ -161,10 +155,7 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "import matplotlib.dates as mdates\n", - "import os\n", - "import sys\n", + "from pandasql import sqldf\n", "\n", "# Gets the absolute path to the src folder\n", "sys.path.append(os.path.abspath(\"../src\"))\n", @@ -172,47 +163,27 @@ "# Import the kelvin to celsius function\n", "from my_package.util import kelvin_to_celsius\n", "\n", - "output_folder = \"../data/output_fig\"\n", - "os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist\n", - "\n", - "# Converts to and make a new column with celsius temp, and not kelvin\n", "df['temp.mean_celsius'] = kelvin_to_celsius(df['temp.mean'])\n", - "temp = df['temp.mean_celsius']\n", - "\n", - "# Convert from day and month, to datetime\n", - "# df['date'] = pd.to_datetime(df[['month', 'day']].assign(year=2024))\n", + "df['temp.max_celsius'] = kelvin_to_celsius(df['temp.record_max'])\n", + "df['temp.min_celsius'] = kelvin_to_celsius(df['temp.record_min'])\n", "\n", "# Create a new column that concatenates month and day (e.g., \"03-01\" for March 1)\n", "df['month_day'] = df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", "\n", - "# Plot the graph of the mean temperature\n", - "plt.figure(figsize=(12, 6))\n", - "plt.plot(df['month_day'], temp)\n", - "\n", - "# Label for easier reading and understanding of the plot\n", - "plt.title(f\"Mean temp - statistic historical {city_name}\")\n", - "plt.xlabel(\"Date\")\n", - "plt.ylabel(\"Temperature (°C)\")\n", - "\n", - "# Add marker at 0 temperature\n", - "plt.axhline(y=0, color='black', linewidth=1.5)\n", - "\n", - "# Customize the x-axis to show ticks and labels only at the start of each month\n", - "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", - "# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", - "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) \n", + "temp_data = sqldf('''\n", + " SELECT month_day, `temp.mean_celsius` as temp\n", + " FROM df\n", + "''')\n", "\n", - "plt.xticks(rotation=45)\n", - "plt.yticks(range(-20, 30, 2))\n", - "plt.tight_layout()\n", - "plt.grid()\n", + "display(temp_data)\n", "\n", - "# Save the plot to the data/output_fig folder\n", - "plot_path = os.path.join(output_folder, f\"mean_temp_plot_{city_name}.png\")\n", - "plt.savefig(plot_path) # Save the plot as a PNG file\n", + "# Extract and stores temperatur data for each city using pandas sql\n", + "stat_temp = sqldf('''\n", + " SELECT AVG(`temp.mean_celsius`) AS avg_temp, MAX(`temp.max_celsius`) AS max_temp, MIN(`temp.min_celsius`) AS min_temp\n", + " FROM df\n", + " ''')\n", "\n", - "# Show the plot\n", - "plt.show()\n" + "display(stat_temp)" ] }, { @@ -231,27 +202,18 @@ "source": [ "import matplotlib.pyplot as plt\n", "import matplotlib.dates as mdates\n", - "import os\n", - "import sys\n", - "\n", - "# Gets the absolute path to the src folder\n", - "sys.path.append(os.path.abspath(\"../src\"))\n", "\n", - "# Import the kelvin to celsius function\n", - "from my_package.util import kelvin_to_celsius\n", - "\n", - "# Defines the output folder for the figure, and makes it if is does not exsist\n", "output_folder = \"../data/output_fig\"\n", - "os.makedirs(output_folder, exist_ok=True) \n", + "os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist\n", + "\n", "\n", - "# Converts to and make a new column with celsius temp, and not kelvin\n", - "df['temp.mean_celsius'] = kelvin_to_celsius(df['temp.mean'])\n", "temp = df['temp.mean_celsius']\n", - "precipitation = df['precipitation.mean']\n", - "wind = df['wind.mean']\n", + "temp_mean = sqldf('''SELECT avg_temp FROM stat_temp''').iloc[0, 0]\n", "\n", - "# Create a new column that concatenates month and day (e.g., \"03-01\" for March 1)\n", - "df['month_day'] = df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", + "# Extract precipitation values for both cities\n", + "# Because pandas sql returnes the value as a dataframe, we need to get the actual value (all rows, first column)\n", + "precipitation = sqldf('''SELECT `precipitation.mean` FROM df''').iloc[:,0]\n", + "wind = sqldf('''SELECT `wind.mean` FROM df''').iloc[:,0]\n", "\n", "x_axis = df['month_day']\n", "\n", @@ -265,6 +227,7 @@ "\n", "# Add marker at 0 temperature\n", "ax1.axhline(y=0, color='black', linewidth=1.5)\n", + "ax1.axhline(y=temp_mean, color='red', linestyle=\"dashed\")\n", "\n", "# Plot precipitation as bars on the secondary y-axis\n", "ax2 = ax1.twinx()\n", @@ -293,9 +256,7 @@ "plt.tight_layout()\n", "\n", "# Show the plot\n", - "plt.show()\n", - "\n", - "print(df['precipitation.max'].max())" + "plt.show()" ] }, { @@ -445,9 +406,9 @@ "df.loc[(df['temp.record_max_celsius'] > max_upper_limit) | (df['temp.record_max_celsius'] < max_lower_limit), 'temp.record_max_celsius'] = np.nan\n", "\n", "# Interpolate to replace NaN values with linear interpolation\n", - "df['temp.mean_celsius'] = df['temp.mean_celsius'].interpolate(method='linear')\n", - "df['temp.record_min_celsius'] = df['temp.record_min_celsius'].interpolate(method='linear')\n", - "df['temp.record_max_celsius'] = df['temp.record_max_celsius'].interpolate(method='linear')" + "df['temp.mean_celsius'] = df['temp.mean_celsius'].interpolate(method='linear', limit_direction='both')\n", + "df['temp.record_min_celsius'] = df['temp.record_min_celsius'].interpolate(method='linear', limit_direction='both')\n", + "df['temp.record_max_celsius'] = df['temp.record_max_celsius'].interpolate(method='linear', limit_direction='both')" ] }, {