Skip to content

Commit

Permalink
Merge pull request #11 from torave/tora
Browse files Browse the repository at this point in the history
Tora - universal functions
  • Loading branch information
torave authored and GitHub Enterprise committed Apr 8, 2025
2 parents 8da093e + 04da5d8 commit f47456f
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 94 deletions.
66 changes: 19 additions & 47 deletions notebooks/notebook_one_day_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -164,26 +164,10 @@
"# Reads from file using pandas\n",
"weather_data = pd.read_json(f'../data/output_stedsnavn/data_{filename}.json')\n",
"\n",
"# Checks if 'list' in weather, then proceed because it is the right data\n",
"if 'list' in weather_data:\n",
" # Normalize the json for better readability\n",
" df = pd.json_normalize(weather_data['list'])\n",
"from my_package.util import extract_city_df\n",
"\n",
" # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n",
" df = df.drop_duplicates(subset=['dt'])\n",
"\n",
" # The weather column dosnt have any releated information, therefor we delete it\n",
" df = df.drop(columns=\"weather\")\n",
"\n",
" # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n",
" df['dt'] = pd.to_datetime(df['dt'], unit='s')\n",
" df.set_index('dt', inplace=True)\n",
"\n",
" # Ensure the DataFrame is displayed correctly \n",
" display(df)\n",
" \n",
"else:\n",
" print(\"The 'list' key is not present in the JSON file.\")"
"df = extract_city_df(weather_data)\n",
"display(df)"
]
},
{
Expand Down Expand Up @@ -249,6 +233,9 @@
"# Add title for the plot\n",
"plt.title(f'Temperatur {city_name}, ({date})')\n",
"\n",
"# Add marker at 0 temperature\n",
"plt.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Show grid\n",
"plt.grid()\n",
"\n",
Expand Down Expand Up @@ -277,6 +264,9 @@
"import matplotlib.dates as mdates\n",
"import numpy as np\n",
"\n",
"from my_package.util import ensure_rain_column\n",
"from my_package.util import ensure_snow_column\n",
"\n",
"x_axis = df.index\n",
"\n",
"# Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n",
Expand All @@ -285,17 +275,15 @@
"\n",
"# If no rain, make the rain column and fill it with NaN\n",
"except KeyError:\n",
" print(\"'Rain' is not present in the JSON file.\")\n",
" df['rain.1h'] = np.nan\n",
" df = ensure_rain_column(df)\n",
"\n",
"# Checks if the snow is a value, it will not be if it is no rain and then cause a KeyError\n",
"try:\n",
" snow = df['snow.1h']\n",
"\n",
"# If no snow, make the snow column and fill it with NaN\n",
"except KeyError:\n",
" print(\"'Snow' is not present in the JSON file.\")\n",
" df['snow.1h'] = np.nan\n",
" df = ensure_snow_column(df)\n",
"\n",
"# Choose the width and height of the plot\n",
"plt.figure(figsize=(15, 6))\n",
Expand Down Expand Up @@ -387,17 +375,12 @@
"metadata": {},
"outputs": [],
"source": [
"# If rain is stored, fill the NaN with 0\n",
"try: \n",
" df['rain.1h'] = df['rain.1h'].fillna(0)\n",
"except KeyError:\n",
" print([\"'rain.1h', not in df\"])\n",
"from my_package.util import fill_rain_column\n",
"from my_package.util import fill_snow_column\n",
"\n",
"# If snow is stored, fill the NaN with 0\n",
"try: \n",
" df['snow.1h'] = df['snow.1h'].fillna(0)\n",
"except KeyError:\n",
" print(\"['snow.1h'], not in df\")\n",
"df = fill_rain_column(df)\n",
"\n",
"df = fill_snow_column(df)\n",
"\n",
"# Drops all the columns, if it has 'NaN' value.\n",
"df = df.dropna(axis='columns', how='all')\n",
Expand Down Expand Up @@ -518,6 +501,9 @@
"ax1.axhline(y=temp_mean, color='tab:red', linestyle='dashed', label='Mean temperature (°C)')\n",
"ax1.tick_params(axis='y', labelcolor='tab:red')\n",
"\n",
"# Add marker at 0 temperature\n",
"ax1.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Plot Precipitation as bars on the secondary y-axis\n",
"ax2 = ax1.twinx()\n",
"\n",
Expand Down Expand Up @@ -571,13 +557,6 @@
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -688,13 +667,6 @@
"# Display the predicted temperatures\n",
"print(f'predicted temperatures: {predicted_temperatures}')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
75 changes: 28 additions & 47 deletions notebooks/notebook_one_week_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@
"### Rensking av riktig data\n",
"Vi går inn i 'list' for å finne den relevante informasjonen, og ikke bare meta-informasjon.\n",
"\n",
"Sørger for å fjerne duplikater, og andre irelevante kolonner. Samt setter index kolonnen til tid."
"Sørger for å fjerne duplikater, og andre irelevante kolonner. Samt setter index kolonnen til tid.\n",
"\n",
"Sjekker om kolonnene ['rain.1h'] eller ['snow.1h'] er tilstede, hvis ikke blir de lagd og fylt med NaN. Vi trenger denne kolonnen, selvom den er tom, for senere å plotte grafen og unngå feilmeldinger. Dette fordi vi senere sjekker manglende verdier som NaN. "
]
},
{
Expand All @@ -171,43 +173,29 @@
"source": [
"import numpy as np\n",
"\n",
"# Goes into the 'list' to get the needed and relevant information\n",
"if 'list' in data:\n",
" # Normalize the json, for better readability\n",
" df = pd.json_normalize(data['list'])\n",
"\n",
" # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once\n",
" df = df.drop_duplicates(subset=['dt'])\n",
"from my_package.util import extract_city_df\n",
"from my_package.util import ensure_rain_column\n",
"from my_package.util import ensure_snow_column\n",
"\n",
" # The weather column does not have any releated information, therefor we delete it\n",
" df = df.drop(columns=\"weather\")\n",
"df = extract_city_df(data)\n",
"\n",
" # Convert 'dt' column from Unix timestamp to datetime and set it as the index\n",
" df['dt'] = pd.to_datetime(df['dt'], unit='s')\n",
" df.set_index('dt', inplace=True)\n",
"# Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n",
"try:\n",
" rain = df['rain.1h']\n",
"\n",
" # Checks if the rain is a value, it will not be if it is no rain and then cause a KeyError\n",
" try:\n",
" rain = df['rain.1h']\n",
"\n",
" # If no rain, make the rain column and fill it with NaN\n",
" except KeyError:\n",
" print(\"'Rain' is not present in the JSON file.\")\n",
" df['rain.1h'] = np.nan\n",
"# If no rain, make the rain column and fill it with NaN\n",
"except KeyError:\n",
" df = ensure_rain_column(df)\n",
"\n",
" # Checks if the snow is a value, it will not be if it is no rain and then cause a KeyError\n",
" try:\n",
" snow = df['snow.1h']\n",
"# Checks if the snow is a value, it will not be if it is no snow and then cause a KeyError\n",
"try:\n",
" snow = df['snow.1h']\n",
"\n",
" # If no snow, make the snow column and fill it with NaN\n",
" except KeyError:\n",
" print(\"'Snow' is not present in the JSON file.\")\n",
" df['snow.1h'] = np.nan\n",
"# If no snow, make the snow column and fill it with NaN\n",
"except KeyError:\n",
" df = ensure_snow_column(df)\n",
"\n",
" # Display the datafram, with the changes\n",
" display(df)\n",
"else:\n",
" print(\"The 'list' key is not present in the JSON file.\")"
"display(df) "
]
},
{
Expand Down Expand Up @@ -274,17 +262,11 @@
"metadata": {},
"outputs": [],
"source": [
"# If rain is stored, fill the NaN with 0\n",
"try: \n",
" df['rain.1h'] = df['rain.1h'].fillna(0)\n",
"except KeyError:\n",
" print([\"'rain.1h', not in df\"])\n",
"from my_package.util import fill_rain_column\n",
"from my_package.util import fill_snow_column\n",
"\n",
"# If snow is stored, fill the NaN with 0\n",
"try: \n",
" df['snow.1h'] = df['snow.1h'].fillna(0)\n",
"except KeyError:\n",
" print(\"['snow.1h'], not in df\")\n",
"df = fill_rain_column(df)\n",
"df = fill_snow_column(df)\n",
"\n",
"# If wind_gust is stored, fill the NaN with 0\n",
"try: \n",
Expand Down Expand Up @@ -407,13 +389,13 @@
"# Plot Precipitation as bars on the secondary y-axis\n",
"ax2 = ax1.twinx()\n",
"\n",
"bin_count = len(x_axis)\n",
"\n",
"# Add rain\n",
"# ax2.bar(x_axis, rain, color='tab:blue', alpha=0.5, width=0.02, label='Rain (mm)')\n",
"ax2.hist(x_axis, bins=len(x_axis), weights=rain, color='tab:blue', alpha=0.5, label= 'Rain (mm)', bottom=snow)\n",
"ax2.hist(x_axis, bins=bin_count, weights=rain, color='tab:blue', alpha=0.5, label= 'Rain (mm)', bottom=snow)\n",
"\n",
"# Add snow\n",
"# ax2.bar(x_axis, snow, color='tab:grey', alpha=0.5, width=0.02, label='Snow (mm)')\n",
"ax2.hist(x_axis, bins=len(x_axis), weights=snow, color='tab:gray', alpha=0.5, label= 'Snow (mm)')\n",
"ax2.hist(x_axis, bins=bin_count, weights=snow, color='tab:gray', alpha=0.5, label= 'Snow (mm)')\n",
"\n",
"# Design the y-axis for precipiation\n",
"ax2.set_ylabel(\"Precipitation (mm)\", color='tab:blue')\n",
Expand Down Expand Up @@ -596,7 +578,6 @@
"plot_path = os.path.join(output_folder, f\"weather_data_plot{city_name}.png\")\n",
"plt.savefig(plot_path) # Save the plot as a PNG file\n",
"\n",
"\n",
"# Show the plot\n",
"plt.show()"
]
Expand Down
10 changes: 10 additions & 0 deletions notebooks/notebook_statistic_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@
"plt.xlabel(\"Date\")\n",
"plt.ylabel(\"Temperature (°C)\")\n",
"\n",
"# Add marker at 0 temperature\n",
"plt.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Customize the x-axis to show ticks and labels only at the start of each month\n",
"plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n",
"# Format ticks to show abbreviated month names (e.g., Jan, Feb)\n",
Expand Down Expand Up @@ -262,6 +265,9 @@
"ax1.set_ylabel('Temperature (°C)', color='tab:red')\n",
"ax1.tick_params(axis='y', labelcolor='tab:red')\n",
"\n",
"# Add marker at 0 temperature\n",
"ax1.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Plot precipitation as bars on the secondary y-axis\n",
"ax2 = ax1.twinx()\n",
"ax2.bar(x_axis, precipitation, color='tab:blue', alpha=0.5, width=1, label='Precipitation (mm)')\n",
Expand Down Expand Up @@ -349,6 +355,8 @@
"plt.plot(x_axis, temp_record_max, color='tab:red', label = 'Max temperatur')\n",
"plt.plot(x_axis, temp_record_min, color='tab:blue', label = 'Min temperatur')\n",
"\n",
"# Add marker at 0 temperature\n",
"plt.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Customize the x-axis to show ticks and labels only at the start of each month\n",
"plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n",
Expand Down Expand Up @@ -482,6 +490,8 @@
"plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) \n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b')) # Format ticks to show abbreviated month names (e.g., Jan, Feb)\n",
"\n",
"plt.axhline(y=0, color='black', linewidth=1.5)\n",
"\n",
"# Add labels, title, and legend\n",
"plt.xlabel('Month-Day')\n",
"plt.ylabel('Temperature (°C)')\n",
Expand Down

0 comments on commit f47456f

Please sign in to comment.