From 83db88fab912fdc93463271a7327a25adca0c924 Mon Sep 17 00:00:00 2001 From: toravest Date: Mon, 7 Apr 2025 11:29:12 +0200 Subject: [PATCH] add universal functions, to import to notebook, ex. ensure_rain/snow --- src/my_package/util.py | 74 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/src/my_package/util.py b/src/my_package/util.py index 2eb13c3..0c01171 100644 --- a/src/my_package/util.py +++ b/src/my_package/util.py @@ -1,3 +1,7 @@ +import numpy as np +import pandas as pd + +# Function to replace the norcid 'æøå' def replace_nordic(city_name): for letter in city_name: if letter in 'æøå': @@ -6,7 +10,73 @@ def replace_nordic(city_name): city_name = city_name.replace('å', 'aa') return city_name - +# Function to convert from kelvin to celsius temp def kelvin_to_celsius(temp_in_kelvin): temp_in_celsius = temp_in_kelvin - 273.15 - return temp_in_celsius \ No newline at end of file + return temp_in_celsius + +# Fucntion to check if there are a ['rain.1h'] column in the dataset, if not make one +def ensure_rain_column(df): + try: + _ = df['rain.1h'] + + # If no rain, make the rain column and fill it with NaN + except KeyError: + print("'Rain' is not present in the JSON file.") + df['rain.1h'] = np.nan + + return df + +# Fucntion to check if there are a ['snow.1h'] column in the dataset, if not make one +def ensure_snow_column(df): + try: + _ = df['snow.1h'] + + # If no snow, make the snow column and fill it with NaN + except KeyError: + print("'Snow' is not present in the JSON file.") + df['snow.1h'] = np.nan + + return df + +# Function to fill NaN values in ['rain.1h'] columns with 0 +def fill_rain_column(df): + try: + # Replace the NaN with 0, using pandas function + df['rain.1h'] = df['rain.1h'].fillna(0) + + except KeyError: + print(["'rain.1h', not in df"]) + + return df + +# Function to fill NaN values in ['snow.1h'] columns with 0 +def fill_snow_column(df): + try: + # Replace the NaN with 0, using pandas function + df['snow.1h'] = df['snow.1h'].fillna(0) + + except KeyError: + print(["'snow.1h', not in df"]) + + return df + +# Function to 'normalize' the dataset, with index-changing and dropping meta-data +def extract_city_df(weather_data): + if 'list' in weather_data: + # Normalize the json for better readability + df = pd.json_normalize(weather_data['list']) + + # Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once + df = df.drop_duplicates(subset=['dt']) + + # The weather column dosnt have any releated information, therefor we delete it + df = df.drop(columns="weather") + + # Convert 'dt' column from Unix timestamp to datetime and set it as the index + df['dt'] = pd.to_datetime(df['dt'], unit='s') + df.set_index('dt', inplace=True) + return df + + else: + return None \ No newline at end of file