Skip to content

Tora - universal functions #11

Merged
merged 3 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 19 additions & 47 deletions notebooks/notebook_one_day_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -164,26 +164,10 @@
"# Reads from file using pandas\n",
"weather_data = pd.read_json(f'../data/output_stedsnavn/data_{filename}.json')\n",
"\n",
"# Checks if 'list' in weather, then proceed because it is the right data\n",
"if 'list' in weather_data:\n",
" # Normalize the json for better readability\n",
" df = pd.json_normalize(weather_data['list'])\n",
"from my_package.util import extract_city_df\n",
"\n",
" # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n",
" df = df.drop_duplicates(subset=['dt'])\n",
"\n",
" # The weather column dosnt have any releated information, therefor we delete it\n",
" df = df.drop(columns=\"weather\")\n",
"\n",
" # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n",
" df['dt'] = pd.to_datetime(df['dt'], unit='s')\n",
" df.set_index('dt', inplace=True)\n",
"\n",
" # Ensure the DataFrame is displayed correctly \n",
" display(df)\n",
" \n",
"else:\n",
" print(\"The 'list' key is not present in the JSON file.\")"
"df = extract_city_df(weather_data)\n",
"display(df)"
]
},
{
Expand Down Expand Up @@ -249,6 +233,9 @@
"# Add title for the plot\n",
"plt.title(f'Temperatur {city_name}, ({date})')\n",
"\n",
"# Add marker at 0 temperature\n",
"plt.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Show grid\n",
"plt.grid()\n",
"\n",
Expand Down Expand Up @@ -277,6 +264,9 @@
"import matplotlib.dates as mdates\n",
"import numpy as np\n",
"\n",
"from my_package.util import ensure_rain_column\n",
"from my_package.util import ensure_snow_column\n",
"\n",
"x_axis = df.index\n",
"\n",
"# Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n",
Expand All @@ -285,17 +275,15 @@
"\n",
"# If no rain, make the rain column and fill it with NaN\n",
"except KeyError:\n",
" print(\"'Rain' is not present in the JSON file.\")\n",
" df['rain.1h'] = np.nan\n",
" df = ensure_rain_column(df)\n",
"\n",
"# Checks if the snow is a value, it will not be if it is no rain and then cause a KeyError\n",
"try:\n",
" snow = df['snow.1h']\n",
"\n",
"# If no snow, make the snow column and fill it with NaN\n",
"except KeyError:\n",
" print(\"'Snow' is not present in the JSON file.\")\n",
" df['snow.1h'] = np.nan\n",
" df = ensure_snow_column(df)\n",
"\n",
"# Choose the width and height of the plot\n",
"plt.figure(figsize=(15, 6))\n",
Expand Down Expand Up @@ -387,17 +375,12 @@
"metadata": {},
"outputs": [],
"source": [
"# If rain is stored, fill the NaN with 0\n",
"try: \n",
" df['rain.1h'] = df['rain.1h'].fillna(0)\n",
"except KeyError:\n",
" print([\"'rain.1h', not in df\"])\n",
"from my_package.util import fill_rain_column\n",
"from my_package.util import fill_snow_column\n",
"\n",
"# If snow is stored, fill the NaN with 0\n",
"try: \n",
" df['snow.1h'] = df['snow.1h'].fillna(0)\n",
"except KeyError:\n",
" print(\"['snow.1h'], not in df\")\n",
"df = fill_rain_column(df)\n",
"\n",
"df = fill_snow_column(df)\n",
"\n",
"# Drops all the columns, if it has 'NaN' value.\n",
"df = df.dropna(axis='columns', how='all')\n",
Expand Down Expand Up @@ -518,6 +501,9 @@
"ax1.axhline(y=temp_mean, color='tab:red', linestyle='dashed', label='Mean temperature (°C)')\n",
"ax1.tick_params(axis='y', labelcolor='tab:red')\n",
"\n",
"# Add marker at 0 temperature\n",
"ax1.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Plot Precipitation as bars on the secondary y-axis\n",
"ax2 = ax1.twinx()\n",
"\n",
Expand Down Expand Up @@ -571,13 +557,6 @@
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -688,13 +667,6 @@
"# Display the predicted temperatures\n",
"print(f'predicted temperatures: {predicted_temperatures}')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
75 changes: 28 additions & 47 deletions notebooks/notebook_one_week_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@
"### Rensking av riktig data\n",
"Vi går inn i 'list' for å finne den relevante informasjonen, og ikke bare meta-informasjon.\n",
"\n",
"Sørger for å fjerne duplikater, og andre irelevante kolonner. Samt setter index kolonnen til tid."
"Sørger for å fjerne duplikater, og andre irelevante kolonner. Samt setter index kolonnen til tid.\n",
"\n",
"Sjekker om kolonnene ['rain.1h'] eller ['snow.1h'] er tilstede, hvis ikke blir de lagd og fylt med NaN. Vi trenger denne kolonnen, selvom den er tom, for senere å plotte grafen og unngå feilmeldinger. Dette fordi vi senere sjekker manglende verdier som NaN. "
]
},
{
Expand All @@ -171,43 +173,29 @@
"source": [
"import numpy as np\n",
"\n",
"# Goes into the 'list' to get the needed and relevant information\n",
"if 'list' in data:\n",
" # Normalize the json, for better readability\n",
" df = pd.json_normalize(data['list'])\n",
"\n",
" # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n",
" df = df.drop_duplicates(subset=['dt'])\n",
"from my_package.util import extract_city_df\n",
"from my_package.util import ensure_rain_column\n",
"from my_package.util import ensure_snow_column\n",
"\n",
" # The weather column does not have any releated information, therefor we delete it\n",
" df = df.drop(columns=\"weather\")\n",
"df = extract_city_df(data)\n",
"\n",
" # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n",
" df['dt'] = pd.to_datetime(df['dt'], unit='s')\n",
" df.set_index('dt', inplace=True)\n",
"# Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n",
"try:\n",
" rain = df['rain.1h']\n",
"\n",
" # Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n",
" try:\n",
" rain = df['rain.1h']\n",
"\n",
" # If no rain, make the rain column and fill it with NaN\n",
" except KeyError:\n",
" print(\"'Rain' is not present in the JSON file.\")\n",
" df['rain.1h'] = np.nan\n",
"# If no rain, make the rain column and fill it with NaN\n",
"except KeyError:\n",
" df = ensure_rain_column(df)\n",
"\n",
" # Checks if the snow is a value, it will not be if it is no rain and then cause a KeyError\n",
" try:\n",
" snow = df['snow.1h']\n",
"# Checks if the snow is a value, it will not be if it is no snow and then cause a KeyError\n",
"try:\n",
" snow = df['snow.1h']\n",
"\n",
" # If no snow, make the snow column and fill it with NaN\n",
" except KeyError:\n",
" print(\"'Snow' is not present in the JSON file.\")\n",
" df['snow.1h'] = np.nan\n",
"# If no snow, make the snow column and fill it with NaN\n",
"except KeyError:\n",
" df = ensure_snow_column(df)\n",
"\n",
" # Display the datafram, with the changes\n",
" display(df)\n",
"else:\n",
" print(\"The 'list' key is not present in the JSON file.\")"
"display(df) "
]
},
{
Expand Down Expand Up @@ -274,17 +262,11 @@
"metadata": {},
"outputs": [],
"source": [
"# If rain is stored, fill the NaN with 0\n",
"try: \n",
" df['rain.1h'] = df['rain.1h'].fillna(0)\n",
"except KeyError:\n",
" print([\"'rain.1h', not in df\"])\n",
"from my_package.util import fill_rain_column\n",
"from my_package.util import fill_snow_column\n",
"\n",
"# If snow is stored, fill the NaN with 0\n",
"try: \n",
" df['snow.1h'] = df['snow.1h'].fillna(0)\n",
"except KeyError:\n",
" print(\"['snow.1h'], not in df\")\n",
"df = fill_rain_column(df)\n",
"df = fill_snow_column(df)\n",
"\n",
"# If wind_gust is stored, fill the NaN with 0\n",
"try: \n",
Expand Down Expand Up @@ -407,13 +389,13 @@
"# Plot Precipitation as bars on the secondary y-axis\n",
"ax2 = ax1.twinx()\n",
"\n",
"bin_count = len(x_axis)\n",
"\n",
"# Add rain\n",
"# ax2.bar(x_axis, rain, color='tab:blue', alpha=0.5, width=0.02, label='Rain (mm)')\n",
"ax2.hist(x_axis, bins=len(x_axis), weights=rain, color='tab:blue', alpha=0.5, label= 'Rain (mm)', bottom=snow)\n",
"ax2.hist(x_axis, bins=bin_count, weights=rain, color='tab:blue', alpha=0.5, label= 'Rain (mm)', bottom=snow)\n",
"\n",
"# Add snow\n",
"# ax2.bar(x_axis, snow, color='tab:grey', alpha=0.5, width=0.02, label='Snow (mm)')\n",
"ax2.hist(x_axis, bins=len(x_axis), weights=snow, color='tab:gray', alpha=0.5, label= 'Snow (mm)')\n",
"ax2.hist(x_axis, bins=bin_count, weights=snow, color='tab:gray', alpha=0.5, label= 'Snow (mm)')\n",
"\n",
"# Design the y-axis for precipiation\n",
"ax2.set_ylabel(\"Precipitation (mm)\", color='tab:blue')\n",
Expand Down Expand Up @@ -596,7 +578,6 @@
"plot_path = os.path.join(output_folder, f\"weather_data_plot{city_name}.png\")\n",
"plt.savefig(plot_path) # Save the plot as a PNG file\n",
"\n",
"\n",
"# Show the plot\n",
"plt.show()"
]
Expand Down
10 changes: 10 additions & 0 deletions notebooks/notebook_statistic_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@
"plt.xlabel(\"Date\")\n",
"plt.ylabel(\"Temperature (°C)\")\n",
"\n",
"# Add marker at 0 temperature\n",
"plt.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Customize the x-axis to show ticks and labels only at the start of each month\n",
"plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n",
"# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n",
Expand Down Expand Up @@ -262,6 +265,9 @@
"ax1.set_ylabel('Temperature (°C)', color='tab:red')\n",
"ax1.tick_params(axis='y', labelcolor='tab:red')\n",
"\n",
"# Add marker at 0 temperature\n",
"ax1.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Plot precipitation as bars on the secondary y-axis\n",
"ax2 = ax1.twinx()\n",
"ax2.bar(x_axis, precipitation, color='tab:blue', alpha=0.5, width=1, label='Precipitation (mm)')\n",
Expand Down Expand Up @@ -349,6 +355,8 @@
"plt.plot(x_axis, temp_record_max, color='tab:red', label = 'Max temperatur')\n",
"plt.plot(x_axis, temp_record_min, color='tab:blue', label = 'Min temperatur')\n",
"\n",
"# Add marker at 0 temperature\n",
"plt.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Customize the x-axis to show ticks and labels only at the start of each month\n",
"plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n",
Expand Down Expand Up @@ -482,6 +490,8 @@
"plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) # Format ticks to show abbreviated month names (e.g., Jan, Feb)\n",
"\n",
"plt.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Add labels, title, and legend\n",
"plt.xlabel('Month-Day')\n",
"plt.ylabel('Temperature (°C)')\n",
Expand Down