diff --git a/notebooks/get_data_notebook.ipynb b/notebooks/get_data_notebook.ipynb deleted file mode 100644 index c3b6ad0..0000000 --- a/notebooks/get_data_notebook.ipynb +++ /dev/null @@ -1,548 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Velg start dato og sluttdato\n", - "\n", - "For å kunne hente data og gjøre en analyse trenger programmet å vite hvilken periode du vil hente ut for.\n", - "\n", - "Dataen skrives inn slik: (yyyy, mm, dd, hh, mm)\n", - "Her følger et eksempel: \n", - "|Hva|Hvordan|Eksempel|\n", - "|:---|:---:|:---:|\n", - "|år|yyyy|2025|\n", - "|måned|mm|03| \n", - "|dato|dd|01| \n", - "|time|hh|12| \n", - "|minutt|mm|00| \n", - "\n", - "Denne dataen skrives da inn på følgende hvis: (2025, 03, 01, 12, 00)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Start date => unix timestamp: 1742202600\n", - "End date => unix timestamp: 1742548200\n", - "Unix timestamp => start date: 2025-03-17 10:10:00\n", - "Unix timestamp => end date: 2025-03-21 10:10:00\n" - ] - } - ], - "source": [ - "import sys\n", - "import os\n", - "\n", - "# Gets the absolute path to the src folder\n", - "sys.path.append(os.path.abspath(\"../src\"))\n", - "\n", - "# Now we can import the fucntion from the module\n", - "from my_package.date_to_unix import get_unix_timestamp\n", - "from my_package.date_to_unix import from_unix_timestamp\n", - "\n", - "# Runs the function and store the data\n", - "unix_start_date, unix_end_date = get_unix_timestamp()\n", - "\n", - "# Prints the unix_timestamp\n", - "print(\"Start date => unix timestamp:\", unix_start_date)\n", - "print(\"End date => unix timestamp:\", unix_end_date)\n", - "\n", - "# Run the function to convert from unix_timestamp to date, and store the variables\n", - "start_date, end_date = from_unix_timestamp(unix_start_date, unix_end_date)\n", - "\n", - "# prints the date\n", - "print(\"Unix timestamp => start date:\", start_date)\n", - "print(\"Unix timestamp => end date:\", end_date)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Velg et sted i Norge og få data\n", - "\n", - "Skriv inn et sted du ønsker data fra, foreløpig er det begrenset til Norge\n", - "\n", - "Programmet vil deretter hente data å lagre det i en json fil" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data fetch: ok\n" - ] - } - ], - "source": [ - "import sys\n", - "import os\n", - "\n", - "# Gets the absolute path to the src folder\n", - "sys.path.append(os.path.abspath(\"../src\"))\n", - "\n", - "# Now we can import the fucntion from the module\n", - "from my_package.fetch_data import fetch_data\n", - "\n", - "# User input the city, for the weather\n", - "city_name = input(\"Enter a city in Norway: \")\n", - "\n", - "data = fetch_data(unix_start_date, unix_end_date, city_name)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Lagre data i en json-fil\n", - "\n", - "Skriv inn navn for til filen du vil lagre med dataen.\n", - "\n", - "Eks. test\n", - "Da vil filen lagres som data_**test**.json, i mappen \"../data/output_stedsnavn/data_{filnavn}.json\"\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data has been written to /Users/toravestlund/Documents/ITBAITBEDR/TDT4114 - Anvendt programmering/anvendt_mappe/data/output_stedsdata/data_test6.json\n" - ] - } - ], - "source": [ - "# Gets the absolute path to the src folder\n", - "sys.path.append(os.path.abspath(\"../src\"))\n", - "\n", - "from my_package.write_data import write_data\n", - "\n", - "filename = input(\"Write filename: \")\n", - "\n", - "write_data(data, filename)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Lese fra fil\n", - "\n", - "Henter opp data lagret i filen, lagd over, og skriver ut lesbart ved hjelp av pandas" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " message cod city_id calctime cnt \\\n", - "0 Count: 96 200 3133880 0.021173 96 \n", - "1 Count: 96 200 3133880 0.021173 96 \n", - "2 Count: 96 200 3133880 0.021173 96 \n", - "3 Count: 96 200 3133880 0.021173 96 \n", - "4 Count: 96 200 3133880 0.021173 96 \n", - ".. ... ... ... ... ... \n", - "91 Count: 96 200 3133880 0.021173 96 \n", - "92 Count: 96 200 3133880 0.021173 96 \n", - "93 Count: 96 200 3133880 0.021173 96 \n", - "94 Count: 96 200 3133880 0.021173 96 \n", - "95 Count: 96 200 3133880 0.021173 96 \n", - "\n", - " list \n", - "0 {'dt': 1742205600, 'main': {'temp': 1.98, 'fee... \n", - "1 {'dt': 1742209200, 'main': {'temp': 3.05, 'fee... \n", - "2 {'dt': 1742212800, 'main': {'temp': 3.6, 'feel... \n", - "3 {'dt': 1742216400, 'main': {'temp': 4.16, 'fee... \n", - "4 {'dt': 1742220000, 'main': {'temp': 4.11, 'fee... \n", - ".. ... \n", - "91 {'dt': 1742533200, 'main': {'temp': -0.24, 'fe... \n", - "92 {'dt': 1742536800, 'main': {'temp': -0.24, 'fe... \n", - "93 {'dt': 1742540400, 'main': {'temp': 0.62, 'fee... \n", - "94 {'dt': 1742544000, 'main': {'temp': 2.18, 'fee... \n", - "95 {'dt': 1742547600, 'main': {'temp': 5.03, 'fee... \n", - "\n", - "[96 rows x 6 columns]\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "data = pd.read_json(f'../data/output_stedsdata/data_{filename}.json')\n", - "\n", - "print(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
main.tempmain.feels_likemain.pressuremain.humiditymain.temp_minmain.temp_maxwind.speedwind.degwind.gustclouds.allrain.1h
dt
2025-03-17 10:00:001.980.111021921.072.771.792033.581000.36
2025-03-17 11:00:003.050.841021932.733.332.242255.361000.79
2025-03-17 12:00:003.601.491021913.033.882.242484.021001.38
2025-03-17 13:00:004.161.751021923.844.442.682708.051000.16
2025-03-17 14:00:004.110.751021893.885.034.022938.051000.14
....................................
2025-03-21 05:00:00-0.24-2.29102491-1.160.551.671221.8642NaN
2025-03-21 06:00:00-0.24-2.14102490-1.160.551.571361.6744NaN
2025-03-21 07:00:000.62-0.97102589-0.602.031.451251.7797NaN
2025-03-21 08:00:002.180.781025922.183.031.47941.9988NaN
2025-03-21 09:00:005.033.851025785.035.031.60852.2967NaN
\n", - "

96 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " main.temp main.feels_like main.pressure main.humidity \\\n", - "dt \n", - "2025-03-17 10:00:00 1.98 0.11 1021 92 \n", - "2025-03-17 11:00:00 3.05 0.84 1021 93 \n", - "2025-03-17 12:00:00 3.60 1.49 1021 91 \n", - "2025-03-17 13:00:00 4.16 1.75 1021 92 \n", - "2025-03-17 14:00:00 4.11 0.75 1021 89 \n", - "... ... ... ... ... \n", - "2025-03-21 05:00:00 -0.24 -2.29 1024 91 \n", - "2025-03-21 06:00:00 -0.24 -2.14 1024 90 \n", - "2025-03-21 07:00:00 0.62 -0.97 1025 89 \n", - "2025-03-21 08:00:00 2.18 0.78 1025 92 \n", - "2025-03-21 09:00:00 5.03 3.85 1025 78 \n", - "\n", - " main.temp_min main.temp_max wind.speed wind.deg \\\n", - "dt \n", - "2025-03-17 10:00:00 1.07 2.77 1.79 203 \n", - "2025-03-17 11:00:00 2.73 3.33 2.24 225 \n", - "2025-03-17 12:00:00 3.03 3.88 2.24 248 \n", - "2025-03-17 13:00:00 3.84 4.44 2.68 270 \n", - "2025-03-17 14:00:00 3.88 5.03 4.02 293 \n", - "... ... ... ... ... \n", - "2025-03-21 05:00:00 -1.16 0.55 1.67 122 \n", - "2025-03-21 06:00:00 -1.16 0.55 1.57 136 \n", - "2025-03-21 07:00:00 -0.60 2.03 1.45 125 \n", - "2025-03-21 08:00:00 2.18 3.03 1.47 94 \n", - "2025-03-21 09:00:00 5.03 5.03 1.60 85 \n", - "\n", - " wind.gust clouds.all rain.1h \n", - "dt \n", - "2025-03-17 10:00:00 3.58 100 0.36 \n", - "2025-03-17 11:00:00 5.36 100 0.79 \n", - "2025-03-17 12:00:00 4.02 100 1.38 \n", - "2025-03-17 13:00:00 8.05 100 0.16 \n", - "2025-03-17 14:00:00 8.05 100 0.14 \n", - "... ... ... ... \n", - "2025-03-21 05:00:00 1.86 42 NaN \n", - "2025-03-21 06:00:00 1.67 44 NaN \n", - "2025-03-21 07:00:00 1.77 97 NaN \n", - "2025-03-21 08:00:00 1.99 88 NaN \n", - "2025-03-21 09:00:00 2.29 67 NaN \n", - "\n", - "[96 rows x 11 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "data = pd.read_json(f'../data/output_stedsdata/data_{filename}.json')\n", - "\n", - "if 'list' in data:\n", - " df = pd.json_normalize(data['list'])\n", - "\n", - " # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n", - " df = df.drop_duplicates(subset=['dt'])\n", - "\n", - " # The weather column dosnt have any releated information, therefor we delete it\n", - " df = df.drop(columns=\"weather\")\n", - "\n", - " # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n", - " df['dt'] = pd.to_datetime(df['dt'], unit='s')\n", - " df.set_index('dt', inplace=True)\n", - " \n", - "\n", - " \n", - "\n", - " # Ensure the DataFrame is displayed correctly\n", - " display(df)\n", - "\n", - " # # Extract main values\n", - " # temp = df['main.temp']\n", - " # humidity = df['main.humidity']\n", - "\n", - " # # Extract wind values\n", - " # w_speed = df['wind.speed']\n", - "\n", - " # # Extract other variables\n", - " # clouds = df['clouds.all']\n", - "\n", - " # try:\n", - " # rain = df['rain.1h']\n", - " # except KeyError:\n", - " # print(\"'Rain' is not present in the JSON file.\")\n", - "\n", - " # try:\n", - " # snow = df['snow.1h']\n", - " # except KeyError:\n", - " # print(\"'Snow' is not present in the JSON file.\")\n", - "\n", - " # # Print the average temperature\n", - " # print('Gjennomsnitts temperatur: ', temp.mean().round(2))\n", - "\n", - " # Display the temperature column\n", - " # display(temp)\n", - "else:\n", - " print(\"The 'list' key is not present in the JSON file.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# \"komprimere oversikten over\"\n", - "# Som i å, finne gjennomsnitt av alle aktuelle data, \n", - "# høyeste, laveste (spesielt temp) i gitte periode" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/notebook_one_day_data.ipynb b/notebooks/notebook_one_day_data.ipynb index d2972f8..1083d65 100644 --- a/notebooks/notebook_one_day_data.ipynb +++ b/notebooks/notebook_one_day_data.ipynb @@ -182,7 +182,9 @@ "metadata": {}, "source": [ "### Viser temperaturen\n", - "Regner ut gjennomsnittst-temperatur ved hjelp av innebygde funksjoner. Finner også høyeste og laveste målte temperatur." + "Regner ut gjennomsnittst-temperatur ved hjelp av innebygde funksjoner. Finner også høyeste og laveste målte temperatur.\n", + "\n", + "Plotter temperaturen ved hjelp av matplotlib." ] }, { @@ -191,6 +193,9 @@ "metadata": {}, "outputs": [], "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "\n", "# Stores the temperature values\n", "temp = df['main.temp']\n", "\n", @@ -204,7 +209,38 @@ "min_temp = df['main.temp'].min().round(2)\n", "\n", "print(\"Highest temperature:\", max_temp)\n", - "print(\"Lowest temperature:\", min_temp)" + "print(\"Lowest temperature:\", min_temp)\n", + "\n", + "# Set the x_axis to the index, which means the time\n", + "x_axis = df.index\n", + "\n", + "# Choose the width and height of the plot\n", + "plt.figure(figsize=(12, 6))\n", + "\n", + "# Plotting temperatur\n", + "plt.plot(x_axis, temp, color='tab:red', label='Temperatur')\n", + "\n", + "# Get the current axsis, and store it as ax\n", + "ax = plt.gca()\n", + "\n", + "# Customize the x-axis to show ticks for each hour\n", + "ax.xaxis.set_major_locator(mdates.HourLocator(interval=1)) # Tick marks for every hour\n", + "ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) # Format as \"Day Month Hour:Minute\"\n", + "\n", + "# Adjust layout\n", + "plt.tight_layout()\n", + "\n", + "# Add title for the plot, with city_name and start to end date\n", + "plt.title(f'Temperatur {city_name}, ({date})')\n", + "\n", + "# Shows a grid\n", + "plt.grid()\n", + "\n", + "# Show the label-description\n", + "plt.legend(loc = 'upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()" ] }, { @@ -266,7 +302,10 @@ "ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))\n", "\n", "# Add the label-desciption\n", - "plt.legend()\n", + "plt.legend(loc = 'upper right')\n", + "\n", + "# Add title to the plot, with date\n", + "plt.title(f'Precipitation {city_name}, ({date}))')\n", "\n", "# Shows the plot\n", "plt.show()" @@ -405,7 +444,6 @@ "# Two vertically stacked axis, (2 rows, 1 column), width and height of the figure, and the axis share the same x_axis\n", "fig, (ax1, ax3) = plt.subplots(2, 1,figsize=(15, 8), sharex=True)\n", "\n", - "\n", "# Set the title for the diagram, above the first axis, with city_name and input_date\n", "ax1.set_title(f'Weather data for {city_name} ({date}) ')\n", "\n", @@ -429,7 +467,6 @@ "ax2.set_ylabel(\"Precipitation (mm)\", color='tab:blue')\n", "ax2.tick_params(axis='y', labelcolor='tab:blue')\n", "\n", - "\n", "# Format the x-axis to show all hours, in the format \"HH:MM\"\n", "ax1.xaxis.set_major_locator(mdates.HourLocator()) \n", "ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))\n", @@ -441,7 +478,6 @@ "# Add grid, but only vertically\n", "ax1.grid(axis = 'x')\n", "\n", - "\n", "# Plot the wind at the second x-axis (the axis below)\n", "ax3.plot(x_axis, wind_gust, color='tab:purple', label='Wind_gust')\n", "ax3.plot(x_axis, wind_speed, color='tab:purple', linestyle='dashed', label='Wind_speed')\n", @@ -467,7 +503,6 @@ "plot_path = os.path.join(output_folder, f\"weather_data_plot{city_name}.png\")\n", "plt.savefig(plot_path) # Save the plot as a PNG file\n", "\n", - "\n", "# Show the plot\n", "plt.show()" ] diff --git a/notebooks/notebook_one_week_data.ipynb b/notebooks/notebook_one_week_data.ipynb new file mode 100644 index 0000000..5d43ebc --- /dev/null +++ b/notebooks/notebook_one_week_data.ipynb @@ -0,0 +1,540 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook - One week data\n", + "Denne notebooken henter data fra ønsket periode (inntil 7-dager) og sted, skriver til fil. Visualiserer manglende verdier, retter opp manglende verdier, og visualisere og lagrer data fra plot." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Velg start dato og sluttdato\n", + "\n", + "For å kunne hente data og gjøre en analyse trenger programmet å vite hvilken periode du vil hente ut for.\n", + "\n", + "Dataen skrives inn slik: (yyyy, mm, dd, hh, mm)\n", + "Her følger et eksempel: \n", + "|Hva|Hvordan|Eksempel|\n", + "|:---|:---:|:---:|\n", + "|år|yyyy|2025|\n", + "|måned|mm|03| \n", + "|dato|dd|01| \n", + "|time|hh|12| \n", + "|minutt|mm|00| \n", + "\n", + "Denne dataen skrives da inn på følgende hvis: (2025, 03, 01, 12, 00)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "\n", + "# Gets the absolute path to the src folder\n", + "sys.path.append(os.path.abspath(\"../src\"))\n", + "\n", + "# Now we can import the fucntion from the module\n", + "from my_package.date_to_unix import get_unix_timestamp\n", + "from my_package.date_to_unix import from_unix_timestamp\n", + "\n", + "# Runs the function and store the data\n", + "unix_start_date, unix_end_date = get_unix_timestamp()\n", + "\n", + "# Prints the unix_timestamp\n", + "print(\"Start date => unix timestamp:\", unix_start_date)\n", + "print(\"End date => unix timestamp:\", unix_end_date)\n", + "\n", + "# Run the function to convert from unix_timestamp to date, and store the variables\n", + "start_date, end_date = from_unix_timestamp(unix_start_date, unix_end_date)\n", + "\n", + "# Prints the date\n", + "print(\"Unix timestamp => start date:\", start_date)\n", + "print(\"Unix timestamp => end date:\", end_date)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Velg et sted i Norge og få data\n", + "\n", + "Skriv inn et sted du ønsker data fra, foreløpig er det begrenset til Norge\n", + "\n", + "Programmet vil deretter hente data å lagre det i en json fil" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "\n", + "# Gets the absolute path to the src folder\n", + "sys.path.append(os.path.abspath(\"../src\"))\n", + "\n", + "# Now we can import the fucntion from the module\n", + "from my_package.fetch_data import fetch_data\n", + "\n", + "# User input the city, for the weather\n", + "city_name = input(\"Enter a city in Norway: \")\n", + "\n", + "# Stores the values in the variables\n", + "data, folder = fetch_data(unix_start_date, unix_end_date, city_name)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lagre data i en json-fil\n", + "\n", + "Skriv inn navn for til filen du vil lagre med dataen.\n", + "\n", + "Eks. test\n", + "Da vil filen lagres som data_**test**.json, i mappen \"../data/output_stedsnavn/data_{filnavn}.json\"\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Gets the absolute path to the src folder\n", + "sys.path.append(os.path.abspath(\"../src\"))\n", + "\n", + "from my_package.write_data import write_data\n", + "\n", + "# User chose the name for the file\n", + "filename = input(\"Write filename: \")\n", + "\n", + "# Write the data, with the choosen filename\n", + "write_data(data, folder, filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lese fra fil\n", + "\n", + "Henter opp data lagret i filen, lagd over, og skriver ut lesbart ved hjelp av pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# Read json-file using pandas\n", + "data = pd.read_json(f'../data/output_stedsnavn/data_{filename}.json')\n", + "\n", + "# Display the data\n", + "display(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Rensking av riktig data\n", + "Vi går inn i 'list' for å finne den relevante informasjonen, og ikke bare meta-informasjon.\n", + "\n", + "Sørger for å fjerne duplikater, og andre irelevante kolonner. Samt setter index kolonnen til tid." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Goes into the 'list' to get the needed and relevant information\n", + "if 'list' in data:\n", + " # Normalize the json, for better readability\n", + " df = pd.json_normalize(data['list'])\n", + "\n", + " # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n", + " df = df.drop_duplicates(subset=['dt'])\n", + "\n", + " # The weather column does not have any releated information, therefor we delete it\n", + " df = df.drop(columns=\"weather\")\n", + "\n", + " # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n", + " df['dt'] = pd.to_datetime(df['dt'], unit='s')\n", + " df.set_index('dt', inplace=True)\n", + "\n", + " # Display the datafram, with the changes\n", + " display(df)\n", + "else:\n", + " print(\"The 'list' key is not present in the JSON file.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Viser temperaturen\n", + "Regner ut gjennomsnittst-temperatur ved hjelp av innebygde funksjoner. Finner også høyeste og laveste målte temperatur.\n", + "\n", + "Plotter temperaturen for perioden." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "\n", + "# Extract main values\n", + "temp = df['main.temp']\n", + "temp_mean = temp.mean().round(2)\n", + "temp_max = temp.max().round(2)\n", + "temp_min = temp.min().round(2)\n", + "\n", + "# Print the average temperature\n", + "print(f'Mean temperatur: {temp_mean}')\n", + "print(f'Highest temperatur: {temp_max}')\n", + "print(f'Lowest temperatur: {temp_min}')\n", + "\n", + "# Set the x_axis to the index, which means the time\n", + "x_axis = df.index\n", + "\n", + "# Choose the width and height of the plot\n", + "plt.figure(figsize=(12, 6))\n", + "\n", + "# Plotting temperatur\n", + "plt.plot(x_axis, temp, color='tab:red', label='Temperatur')\n", + "\n", + "# Get the current axsis, and store it as ax\n", + "ax = plt.gca()\n", + "\n", + "# Customize the x-axis to show ticks for each hour\n", + "ax.xaxis.set_major_locator(mdates.HourLocator(interval=12)) # Tick marks for every hour\n", + "ax.xaxis.set_major_formatter(mdates.DateFormatter('%d %b %H')) # Format as \"Day Month Hour:Minute\"\n", + "\n", + "# Adjust layout\n", + "plt.tight_layout()\n", + "\n", + "# Add title for the plot, with city_name and start to end date\n", + "plt.title(f'Temperatur {city_name}, from ({start_date}) to ({end_date})')\n", + "\n", + "# Shows a grid\n", + "plt.grid()\n", + "\n", + "# Show the label-description\n", + "plt.legend(loc = 'upper right')\n", + "\n", + "# Show the plot\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualiserer nedbør\n", + "Ved hjelp av matplotlib visualiserer vi nedbør for ønsket periode." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "import numpy as np\n", + "\n", + "x_axis = df.index\n", + "\n", + "# Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n", + "try:\n", + " rain = df['rain.1h']\n", + "\n", + "# If no rain, make the rain column and fill it with NaN\n", + "except KeyError:\n", + " print(\"'Rain' is not present in the JSON file.\")\n", + " df['rain.1h'] = np.nan\n", + "\n", + "# Checks if the snow is a value, it will not be if it is no rain and then cause a KeyError\n", + "try:\n", + " snow = df['snow.1h']\n", + "\n", + "# If no snow, make the snow column and fill it with NaN\n", + "except KeyError:\n", + " print(\"'Snow' is not present in the JSON file.\")\n", + " df['snow.1h'] = np.nan\n", + "\n", + "# Choose the width and height of the plot\n", + "plt.figure(figsize=(15, 6))\n", + "\n", + "# Check with rain, will cause NameError if the try/except over fails\n", + "try:\n", + " plt.bar(x_axis, rain, width=0.02, alpha=0.5, color='tab:blue', label='rain')\n", + "except: NameError\n", + "\n", + "# Check with snow, will cause NameError if the try/except over fails\n", + "try: \n", + " plt.bar(x_axis, snow, width=0.02, alpha=0.5, color='tab:grey', label='snow')\n", + "except: NameError\n", + "\n", + "# Get the current axsis, and store it as ax\n", + "ax = plt.gca()\n", + "\n", + "# Customize the x-axis to show ticks for each hour\n", + "ax.xaxis.set_major_locator(mdates.HourLocator(interval=12)) # Tick marks for every hour\n", + "ax.xaxis.set_major_formatter(mdates.DateFormatter('%d %b %H')) # Format as \"Day Month Hour:Minute\"\n", + "\n", + "# Add the label-desciption\n", + "plt.legend(loc = 'upper right')\n", + "\n", + "# Add title to the plot, with city_name and start to end date\n", + "plt.title(f'Precipitation {city_name}, from ({start_date}) to ({end_date})')\n", + "\n", + "# Shows the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Vise dataframe, med nye kolonner\n", + "Hvis dataframen ikke inneholdt 'rain.1h' eller 'snow.1h', skal de nå ha blitt lagt til med 'NaN' verdier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Display df, to see if 'rain.1h' and 'snow.1h' was added with NaN values\n", + "display(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sjekk for manglende verdier\n", + "Missigno sjekker og visualiserer manglende verdier, slik at det blir lettere å se hvilke kolonner feilen ligger i. \n", + "\n", + "Vis the blir \"hull\" i en søyle, tyder the på manglende verdier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import missingno as msno\n", + "\n", + "# Checks for and display missing values\n", + "msno.matrix(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Endre manglende verdier\n", + "I de fleste tilfeller virker dataene å være tilnærmet \"perfekte\", men de inkluderer bare snø og regn dersom det er snø eller regn. Derfor vil vi fa NaN verdier i de målingene det ikke har regnet/snødd. \n", + "\n", + "Under sjekker vi først om regn eller snø er i målingen, og hvis den er, bytter vi ut NaN med 0." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If rain is stored, fill the NaN with 0\n", + "try: \n", + " df['rain.1h'] = df['rain.1h'].fillna(0)\n", + "except KeyError:\n", + " print([\"'rain.1h', not in df\"])\n", + "\n", + "# If snow is stored, fill the NaN with 0\n", + "try: \n", + " df['snow.1h'] = df['snow.1h'].fillna(0)\n", + "except KeyError:\n", + " print(\"['snow.1h'], not in df\")\n", + "\n", + "# Display the df, now without NaN (atleast for rain and snow)\n", + "display(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualisere endring av data\n", + "Har lagt inn en ny missigno visualisering, for å se at de manglende dataene \"forsvinner\" når vi kjører cellen over. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import missingno as msno\n", + "\n", + "# Visulaize the same data again, but now it should be no missing values (atleast for rain and snow)\n", + "msno.matrix(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualisere data i en graf\n", + "Ved hjelp av Matplotlib har vi visualiert ønsket data, og ved hjelp av subplot, en modul i matplotlib, kan vi plotte flere verdier i samme graf, og få \"to y-akse\" på samme x-akse. \n", + "\n", + "Temperatur og nedbør får plass i samme graf, hvor man leser temperatur verdiene på venstre side, og nedbørsverdiene på høyre side.\n", + "\n", + "I grafen under, men på samme x-akse, finner vi informasjon om vind, både vindhastighet og vindkast." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.dates as mdates\n", + "import os\n", + "\n", + "# Where the figure should be saved when exported\n", + "output_folder = \"../data/output_fig\"\n", + "\n", + "# Creates the folder if it does not exist\n", + "os.makedirs(output_folder, exist_ok=True)\n", + "\n", + "# x_axis set to the index, which mean the datetime\n", + "x_axis = df.index\n", + "\n", + "# Gets the values\n", + "rain = df['rain.1h']\n", + "temp = df['main.temp']\n", + "snow = df['snow.1h']\n", + "wind_gust = df['wind.gust']\n", + "wind_speed = df['wind.speed']\n", + "\n", + "# Two vertically stacked axis, (2 rows, 1 column), width and height of the figure, and the axis share the same x_axis\n", + "fig, (ax1, ax3) = plt.subplots(2, 1,figsize=(15, 8), sharex=True)\n", + "\n", + "\n", + "# Set the title for the diagram, above the first axis, with city_name and input_date\n", + "ax1.set_title(f'Weather data for {city_name} ({start_date}) to ({end_date}) ')\n", + "\n", + "# Plot temperature on the primary y-axis\n", + "ax1.plot(x_axis, temp, color='tab:red', label='Temperature (°C)')\n", + "\n", + "# Design the y-axis for temperatur\n", + "ax1.set_ylabel('Temperature (°C)', color='tab:red')\n", + "ax1.tick_params(axis='y', labelcolor='tab:red')\n", + "\n", + "# Plot Precipitation as bars on the secondary y-axis\n", + "ax2 = ax1.twinx()\n", + "\n", + "# Add rain\n", + "ax2.bar(x_axis, rain, color='tab:blue', alpha=0.5, width=0.02, label='Rain (mm)')\n", + "\n", + "# Add snow\n", + "ax2.bar(x_axis, snow, color='tab:grey', alpha=0.5, width=0.02, label='Snow (mm)')\n", + "\n", + "# Design the y-axis for precipiation\n", + "ax2.set_ylabel(\"Precipitation (mm)\", color='tab:blue')\n", + "ax2.tick_params(axis='y', labelcolor='tab:blue')\n", + "\n", + "\n", + "# Customize the x-axis to show ticks for each hour\n", + "ax1.xaxis.set_major_locator(mdates.HourLocator(interval=12)) # Tick marks for every hour\n", + "ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d %b %H')) # Format as \"Day Month Hour:Minute\"\n", + "\n", + "# Add label-description for both axis\n", + "ax1.legend(loc='upper left')\n", + "ax2.legend(loc='upper right')\n", + "\n", + "# Add grid, but only vertically\n", + "ax1.grid(axis = 'x')\n", + "\n", + "\n", + "# Plot the wind at the second x-axis (the axis below)\n", + "ax3.plot(x_axis, wind_gust, color='tab:purple', label='Wind_gust')\n", + "ax3.plot(x_axis, wind_speed, color='tab:purple', linestyle='dashed', label='Wind_speed')\n", + "ax3.set_ylabel('Wind (m/s)')\n", + "\n", + "# Add x_label visible for both x-axis\n", + "ax3.set_xlabel('Datetime')\n", + "\n", + "# Add label-description\n", + "ax3.legend(loc='upper right')\n", + "\n", + "# Customize the x-axis to show ticks for each hour\n", + "ax3.xaxis.set_major_locator(mdates.HourLocator(interval=12)) # Tick marks for every hour\n", + "ax3.xaxis.set_major_formatter(mdates.DateFormatter('%d %b %H')) # Format as \"Day Month Hour:Minute\"\n", + "\n", + "# Add grid, but only vertically\n", + "ax3.grid(axis = 'x')\n", + "\n", + "# Adjust layout\n", + "plt.tight_layout()\n", + "\n", + "# Save the plot to the data/output_fig folder\n", + "plot_path = os.path.join(output_folder, f\"weather_data_plot{city_name}.png\")\n", + "plt.savefig(plot_path) # Save the plot as a PNG file\n", + "\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}