From 704d63ffd053b409b377ebb3dba3176d5141497b Mon Sep 17 00:00:00 2001 From: toravest Date: Sun, 30 Mar 2025 12:29:31 +0200 Subject: [PATCH] rename notebook, check outliers, missing data (data cleaning) --- notebooks/notebook_statistic_data.ipynb | 581 ++++++++++++++++++++++++ notebooks/statistic_data_notebook.ipynb | 180 -------- 2 files changed, 581 insertions(+), 180 deletions(-) create mode 100644 notebooks/notebook_statistic_data.ipynb delete mode 100644 notebooks/statistic_data_notebook.ipynb diff --git a/notebooks/notebook_statistic_data.ipynb b/notebooks/notebook_statistic_data.ipynb new file mode 100644 index 0000000..836d891 --- /dev/null +++ b/notebooks/notebook_statistic_data.ipynb @@ -0,0 +1,581 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook - Statistic data\n", + "Denne notebooken henter data fra en API som samler alle historiske data for ønsket sted, å regner ut statistiske verdier for alle dagene i året. Vi fjerner uønskede kolonner, utelukker ekstremverdier og visualiserer data gjennom plotter. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Velg et sted i Norge å få statistisk data\n", + "\n", + "Denne API-en henter statistisk historisk data, herunder, statistisk data basert på de historiske dataene, ikke reele statistisk historisk. \n", + "\n", + "Statistikken er basert på de historiske datane total sett, ikke for hvert år." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "\n", + "# Gets the absolute path to the src folder\n", + "sys.path.append(os.path.abspath(\"../src\"))\n", + "\n", + "# Now we can import the fucntion from the module\n", + "from my_package.year_data import fetch_data\n", + "\n", + "# User input the city, for the weather\n", + "city_name = input(\"Enter a city in Norway: \")\n", + "\n", + "for letter in city_name:\n", + " if letter in 'æøå':\n", + " city_name = city_name.replace('æ', 'ae')\n", + " city_name = city_name.replace('ø', 'o')\n", + " city_name = city_name.replace('å', 'aa')\n", + "\n", + "data, folder = fetch_data(city_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lagre data i json-fil\n", + "\n", + "Skriv inn navn for til filen du vil lagre med dataen.\n", + "\n", + "Eks. test\n", + "Da vil filen lagres som data_**test**.json, i mappen \"../data/output_statistikk/data_{filnavn}.json\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Gets the absolute path to the src folder\n", + "sys.path.append(os.path.abspath(\"../src\"))\n", + "\n", + "from my_package.write_data import write_data\n", + "\n", + "filename = input(\"Write filename: \")\n", + "\n", + "write_data(data, folder, filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lese fra fil\n", + "\n", + "Henter opp data lagret i filen over, og lagrer i en variabel." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "data = pd.read_json(f'../data/output_statistikk/data_{filename}.json')\n", + "\n", + "# Display data\n", + "display(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lesbar data\n", + "Sørger for at dataen lagret over blir mer lesbar." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# Checks if the 'result' column is in the data\n", + "if 'result' in data:\n", + " # Normalize the json and store it as a dataframe for better readability\n", + " df = pd.json_normalize(data['result'])\n", + "\n", + " # Display the dataframe\n", + " display(df)\n", + "else:\n", + " print(\"'result' not in data\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Rydder i data\n", + "Fjerner alle kolonner vi ikke trenger, som standardavvik for alle kategorier for alle dager, vi kan regne ut en felles ved å bruke statistisc modulen. \n", + "\n", + "Ettersom alle kateogirene har lik data, ogg vi vil fjerne noen av verdiene fra alle kategoriene. Kan vi bruke filter funksjonen til å filtrere ut dataene som inneholder f.eks. '.st_dev'. Dette gjør at alle kategoirene fjernes på likt å vi slipper å skrive alle flere ganger." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Drop all columns that end with '...' using the filter function\n", + "df = df.drop(columns=df.filter(like='.p25').columns)\n", + "df = df.drop(columns=df.filter(like='.p75').columns)\n", + "df = df.drop(columns=df.filter(like='.st_dev').columns)\n", + "df = df.drop(columns=df.filter(like='.num').columns)\n", + "\n", + "display(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plotter temperatur\n", + "Denne koden plotter data basert på gjennomsnitts temperatur gjennom året. For å sikre lagring av de ulike kjøringene, vil grafen bli lagret i mappen \"../data/output_fig/mean_temp_plot_{city_name}.json\"\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "import os\n", + "\n", + "output_folder = \"../data/output_fig\"\n", + "os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist\n", + "\n", + "# Converts to and make a new column with celsius temp, and not kelvin\n", + "df['temp.mean_celsius'] = df['temp.mean'] - 272.15\n", + "temp = df['temp.mean_celsius']\n", + "\n", + "# Convert from day and month, to datetime\n", + "# df['date'] = pd.to_datetime(df[['month', 'day']].assign(year=2024))\n", + "\n", + "# Create a new column that concatenates month and day (e.g., \"03-01\" for March 1)\n", + "df['month_day'] = df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", + "\n", + "# Plot the graph of the mean temperature\n", + "plt.figure(figsize=(12, 6))\n", + "plt.plot(df['month_day'], temp)\n", + "\n", + "# Label for easier reading and understanding of the plot\n", + "plt.title(f\"Mean temp - statistic historical {city_name}\")\n", + "plt.xlabel(\"Date\")\n", + "plt.ylabel(\"Temperature (°C)\")\n", + "\n", + "# Customize the x-axis to show ticks and labels only at the start of each month\n", + "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", + "# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", + "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) \n", + "\n", + "plt.xticks(rotation=45)\n", + "plt.yticks(range(-20, 30, 2))\n", + "plt.tight_layout()\n", + "plt.grid()\n", + "\n", + "# Save the plot to the data/output_fig folder\n", + "plot_path = os.path.join(output_folder, f\"mean_temp_plot_{city_name}.png\")\n", + "plt.savefig(plot_path) # Save the plot as a PNG file\n", + "\n", + "# Show the plot\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plotter data\n", + "Her plottes temperatur og regn på samme akse, med vind i en egen graf under, men de deler samme x-akse, som er month_date." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "import os\n", + "\n", + "# Defines the output folder for the figure, and makes it if is does not exsist\n", + "output_folder = \"../data/output_fig\"\n", + "os.makedirs(output_folder, exist_ok=True) \n", + "\n", + "# Converts to and make a new column with celsius temp, and not kelvin\n", + "df['temp.mean_celsius'] = df['temp.mean'] - 272.15\n", + "temp = df['temp.mean_celsius']\n", + "precipitation = df['precipitation.mean']\n", + "wind = df['wind.mean']\n", + "\n", + "# Create a new column that concatenates month and day (e.g., \"03-01\" for March 1)\n", + "df['month_day'] = df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", + "\n", + "x_axis = df['month_day']\n", + "\n", + "fig, (ax1, ax3) = plt.subplots(2, 1, figsize = (15, 8), sharex=True)\n", + "\n", + "# Plot temperature on the primary y-axis\n", + "ax1.plot(x_axis, temp, color='tab:red', label='Temperature (°C)')\n", + "# ax1.set_xlabel('Datetime')\n", + "ax1.set_ylabel('Temperature (°C)', color='tab:red')\n", + "ax1.tick_params(axis='y', labelcolor='tab:red')\n", + "\n", + "# Plot precipitation as bars on the secondary y-axis\n", + "ax2 = ax1.twinx()\n", + "ax2.bar(x_axis, precipitation, color='tab:blue', alpha=0.5, width=1, label='Precipitation (mm)')\n", + "ax2.set_ylabel(\"Precipitation (mm)\", color='tab:blue')\n", + "ax2.tick_params(axis='y', labelcolor='tab:blue')\n", + "\n", + "ax1.grid(axis = 'x')\n", + "ax1.legend(loc='upper left')\n", + "ax2.legend(loc='upper right')\n", + "\n", + "ax3.plot(x_axis, wind, color='tab:purple', label='Wind (m/s)')\n", + "# ax3.plot(x_axis, wind_speed, color='tab:purple', linestyle='dashed', label='Wind_speed')\n", + "ax3.set_ylabel('Wind (m/s)')\n", + "ax3.set_xlabel('Datetime')\n", + "ax3.legend(loc='upper right')\n", + "\n", + "ax3.grid(axis = 'x')\n", + "\n", + "\n", + "# Customize the x-axis to show ticks and labels only at the start of each month\n", + "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", + "# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", + "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) \n", + "\n", + "plt.tight_layout()\n", + "\n", + "# Show the plot\n", + "plt.show()\n", + "\n", + "print(df['precipitation.max'].max())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualiserer målte tempraturer\n", + "\n", + "Ved hjelp av matplotlib visualiserer vi temperaturen målt for alle dagene.\n", + "\n", + "Forklaring til grafen:\n", + "- Grå graf: gjennomsnitt av alle målingene\n", + "- Rød graf: høyeste målte temperatur\n", + "- Blå graf: laveste målte temperatur" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "\n", + "# Converts to and make a new column with celsius temp, and not kelvin\n", + "df['temp.mean_celsius'] = df['temp.mean'] - 272.15\n", + "temp_mean = df['temp.mean_celsius']\n", + "\n", + "df['temp.record_max_celsius'] = df['temp.record_max'] - 272.15\n", + "temp_record_max = df['temp.record_max_celsius']\n", + "\n", + "df['temp.record_min_celsius'] = df['temp.record_min'] - 272.15\n", + "temp_record_min = df['temp.record_min_celsius']\n", + "\n", + "# Create a new column that concatenates month and day (e.g., \"03-01\" for March 1)\n", + "df['month_day'] = df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", + "\n", + "# Set the month_date as values for the x_axis\n", + "x_axis = df['month_day']\n", + "\n", + "# Defines the height and width of the figure\n", + "plt.figure(figsize=(12, 6))\n", + "\n", + "# Plots the temperatur\n", + "plt.plot(x_axis, temp_mean, color='tab:gray', label='Mean temperatur')\n", + "plt.plot(x_axis, temp_record_max, color='tab:red', label = 'Max temperatur')\n", + "plt.plot(x_axis, temp_record_min, color='tab:blue', label = 'Min temperatur')\n", + "\n", + "\n", + "# Customize the x-axis to show ticks and labels only at the start of each month\n", + "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", + "# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", + "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) \n", + "\n", + "plt.tight_layout()\n", + "\n", + "# Plot title with city_name\n", + "plt.title(f'Temperatur {city_name}')\n", + "\n", + "# Add grid\n", + "plt.grid()\n", + "\n", + "# Show the label description\n", + "plt.legend(loc = 'upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sjekker uteliggere\n", + "Denne koden sjekker om det er noen uteliggere i de ulike temperatur grafene, altså om noen verdier ligger mer enn 3 standardavvik i fra gjennomsnittet." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import statistics\n", + "\n", + "# Ensure 'month_day' is set as the index\n", + "if 'month_day' in df.columns:\n", + " df.set_index('month_day', inplace=True)\n", + "else:\n", + " print('month_day not in')\n", + "\n", + "# Extract temperature columns\n", + "temp_mean = df['temp.mean_celsius']\n", + "temp_record_min = df['temp.record_min_celsius']\n", + "temp_record_max = df['temp.record_max_celsius']\n", + "\n", + "# Calculate means\n", + "temp_mean_mean = temp_mean.mean()\n", + "temp_record_min_mean = temp_record_min.mean()\n", + "temp_record_max_mean = temp_record_max.mean()\n", + "\n", + "# Calculate standard deviations\n", + "temp_mean_stdev = statistics.stdev(temp_mean)\n", + "temp_record_min_stdev = statistics.stdev(temp_record_min)\n", + "temp_record_max_stdev = statistics.stdev(temp_record_max)\n", + "\n", + "# Calculate 3 standard deviation limits\n", + "mean_lower_limit = temp_mean_mean - (temp_mean_stdev * 3)\n", + "mean_upper_limit = temp_mean_mean + (temp_mean_stdev * 3)\n", + "\n", + "min_lower_limit = temp_record_min_mean - (temp_record_min_stdev * 3)\n", + "min_upper_limit = temp_record_min_mean + (temp_record_min_stdev * 3)\n", + "\n", + "max_lower_limit = temp_record_max_mean - (temp_record_max_stdev * 3)\n", + "max_upper_limit = temp_record_max_mean + (temp_record_max_stdev * 3)\n", + "\n", + "# Identify outliers\n", + "mean_outliers = df.loc[(df['temp.mean_celsius'] > mean_upper_limit) | (df['temp.mean_celsius'] < mean_lower_limit), 'temp.mean_celsius']\n", + "min_outliers = df.loc[(df['temp.record_min_celsius'] > min_upper_limit) | (df['temp.record_min_celsius'] < min_lower_limit), 'temp.record_min_celsius']\n", + "max_outliers = df.loc[(df['temp.record_max_celsius'] > max_upper_limit) | (df['temp.record_max_celsius'] < max_lower_limit), 'temp.record_max_celsius']\n", + "\n", + "# Print the outliers\n", + "print(\"Outliers in temp.mean_celsius:\")\n", + "print(mean_outliers)\n", + "\n", + "print(\"Outliers in temp.record_min_celsius:\")\n", + "print(min_outliers)\n", + "\n", + "print(\"Outliers in temp.record_max_celsius:\")\n", + "print(max_outliers)\n", + "\n", + "# Replace outliers with NaN\n", + "df.loc[(df['temp.mean_celsius'] > mean_upper_limit) | (df['temp.mean_celsius'] < mean_lower_limit), 'temp.mean_celsius'] = np.nan\n", + "df.loc[(df['temp.record_min_celsius'] > min_upper_limit) | (df['temp.record_min_celsius'] < min_lower_limit), 'temp.record_min_celsius'] = np.nan\n", + "df.loc[(df['temp.record_max_celsius'] > max_upper_limit) | (df['temp.record_max_celsius'] < max_lower_limit), 'temp.record_max_celsius'] = np.nan\n", + "\n", + "# Interpolate to replace NaN values with linear interpolation\n", + "df['temp.mean_celsius'] = df['temp.mean_celsius'].interpolate(method='linear')\n", + "df['temp.record_min_celsius'] = df['temp.record_min_celsius'].interpolate(method='linear')\n", + "df['temp.record_max_celsius'] = df['temp.record_max_celsius'].interpolate(method='linear')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualiserer temperatur etter endringer\n", + "Hvis det er uteliggere i dataen, som skal ha blitt endret, vil denne plotten vise en mer riktig og \"feilfri\" plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "\n", + "# Ensure 'month_day' is set as the index for proper plotting\n", + "if 'month_day' in df.columns:\n", + " df.set_index('month_day', inplace=True)\n", + "\n", + "# Extract updated temperature columns\n", + "temp_mean = df['temp.mean_celsius']\n", + "temp_record_max = df['temp.record_max_celsius']\n", + "temp_record_min = df['temp.record_min_celsius']\n", + "\n", + "# Plot the updated temperature data\n", + "plt.figure(figsize=(12, 6))\n", + "\n", + "# Plot mean, max, and min temperatures\n", + "plt.plot(temp_mean.index, temp_mean, color='tab:gray', label='Mean Temperature')\n", + "plt.plot(temp_record_max.index, temp_record_max, color='tab:red', label='Max Temperature')\n", + "plt.plot(temp_record_min.index, temp_record_min, color='tab:blue', label='Min Temperature')\n", + "\n", + "# Customize the x-axis to show ticks and labels only at the start of each month\n", + "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", + "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) # Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", + "\n", + "# Add labels, title, and legend\n", + "plt.xlabel('Month-Day')\n", + "plt.ylabel('Temperature (°C)')\n", + "plt.title(f'Temperature Data for {city_name}')\n", + "plt.legend(loc='upper right')\n", + "\n", + "# Add grid for better readability\n", + "plt.grid()\n", + "\n", + "# Adjust layout to prevent overlap\n", + "plt.tight_layout()\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Rekorder\n", + "\n", + "Denne funksjonen regner ut ulike rekorder for året, for angitt sted." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "\n", + "# Gets the absolute path to the src folder\n", + "sys.path.append(os.path.abspath(\"../src\"))\n", + "\n", + "from my_package.get_record import get_records\n", + "\n", + "summary_df, filename, folder = get_records(df, city_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Skriver dataen til fil\n", + "Lagrer rekord-dataen i en fil, med stedsnavn." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Gets the absolute path to the src folder\n", + "sys.path.append(os.path.abspath(\"../src\"))\n", + "\n", + "from my_package.write_data import write_data\n", + "# makes the data 'json-compatible'\n", + "json_data = summary_df.to_dict(orient=\"records\")\n", + "\n", + "write_data(json_data, folder, filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Leser fra fil, og printer data\n", + "Denne funksjonen henter rekordene fra filen den ble skrevet til, og displayer de som en fin lettlest tabell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import json\n", + "\n", + "# Reads data from file and store it\n", + "with open(f\"../data/output_record/data_{filename}.json\", \"r\", encoding=\"utf-8\") as file:\n", + " data = json.load(file)\n", + "\n", + "# Normalize the data for better readability\n", + "df = pd.json_normalize(data)\n", + "\n", + "\n", + "# Displays the dataframe\n", + "display(df)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/statistic_data_notebook.ipynb b/notebooks/statistic_data_notebook.ipynb deleted file mode 100644 index e4b10d1..0000000 --- a/notebooks/statistic_data_notebook.ipynb +++ /dev/null @@ -1,180 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Velg et sted i Norge å få statistisk data\n", - "\n", - "Denne API-en henter statistisk historisk data, herunder, statistisk data basert på de historiske dataene, ikke reele statistisk historisk. \n", - "\n", - "Statistikken er basert på de historiske datane total sett, ikke for hvert år." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "\n", - "# Gets the absolute path to the src folder\n", - "sys.path.append(os.path.abspath(\"../src\"))\n", - "\n", - "# Now we can import the fucntion from the module\n", - "from my_package.year_data import fetch_data\n", - "\n", - "# User input the city, for the weather\n", - "city_name = input(\"Enter a city in Norway: \")\n", - "\n", - "data, folder = fetch_data(city_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Lagre data i json-fil\n", - "\n", - "Skriv inn navn for til filen du vil lagre med dataen.\n", - "\n", - "Eks. test\n", - "Da vil filen lagres som data_**test**.json, i mappen \"../data/output_statistikk/data_{filnavn}.json\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Gets the absolute path to the src folder\n", - "sys.path.append(os.path.abspath(\"../src\"))\n", - "\n", - "from my_package.write_data import write_data\n", - "\n", - "filename = input(\"Write filename: \")\n", - "\n", - "write_data(data, folder, filename)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Lese fra fil\n", - "\n", - "Henter opp data lagret i filen, lagd over, og skriver ut lesbart ved hjelp av pandas" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "data = pd.read_json(f'../data/output_statistikk/data_{filename}.json')\n", - "\n", - "display(data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "if 'result' in data:\n", - " df = pd.json_normalize(data['result'])\n", - "\n", - " display(df)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Plotter data\n", - "Denne koden plotter data basert på gjennomsnitts temperatur gjennom året. For å sikre lagring av de ulike kjøringene, vil grafen bli lagret i mappen \"../data/output_fig/mean_temp_plot_{city_name}.json\"\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import matplotlib.dates as mdates\n", - "import os\n", - "\n", - "output_folder = \"../data/output_fig\"\n", - "os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist\n", - "\n", - "# Converts to and make a new column with celsius temp, and not kelvin\n", - "df['temp.mean_celsius'] = df['temp.mean'] - 273.15\n", - "temp = df['temp.mean_celsius']\n", - "\n", - "# Convert from day and month, to datetime\n", - "# df['date'] = pd.to_datetime(df[['month', 'day']].assign(year=2024))\n", - "\n", - "# Create a new column that concatenates month and day (e.g., \"03-01\" for March 1)\n", - "df['month_day'] = df[['month', 'day']].apply(lambda x: f\"{x['month']:02d}-{x['day']:02d}\",axis=1)\n", - "\n", - "# Plot the graph of the mean temperature\n", - "plt.figure(figsize=(12, 6))\n", - "plt.plot(df['month_day'], temp)\n", - "\n", - "# Label for easier reading and understanding of the plot\n", - "plt.title(f\"Mean temp - statistic historical {city_name}\")\n", - "plt.xlabel(\"Date\")\n", - "plt.ylabel(\"Temperature (°C)\")\n", - "\n", - "# Customize the x-axis to show ticks and labels only at the start of each month\n", - "plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n", - "# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n", - "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) \n", - "\n", - "plt.xticks(rotation=45)\n", - "plt.yticks(range(-20, 30, 2))\n", - "plt.tight_layout()\n", - "plt.grid()\n", - "\n", - "# Save the plot to the data/output_fig folder\n", - "plot_path = os.path.join(output_folder, f\"mean_temp_plot_{city_name}.png\")\n", - "plt.savefig(plot_path) # Save the plot as a PNG file\n", - "\n", - "# Show the plot\n", - "plt.show()\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}