Skip to content

Commit

Permalink
add universal functions, to import to notebook, ex. ensure_rain/snow
Browse files Browse the repository at this point in the history
  • Loading branch information
torave committed Apr 7, 2025
1 parent bdae9fa commit 83db88f
Showing 1 changed file with 72 additions and 2 deletions.
74 changes: 72 additions & 2 deletions src/my_package/util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import numpy as np
import pandas as pd

# Function to replace the norcid 'æøå'
def replace_nordic(city_name):
for letter in city_name:
if letter in 'æøå':
Expand All @@ -6,7 +10,73 @@ def replace_nordic(city_name):
city_name = city_name.replace('å', 'aa')
return city_name


# Function to convert from kelvin to celsius temp
def kelvin_to_celsius(temp_in_kelvin):
temp_in_celsius = temp_in_kelvin - 273.15
return temp_in_celsius
return temp_in_celsius

# Fucntion to check if there are a ['rain.1h'] column in the dataset, if not make one
def ensure_rain_column(df):
try:
_ = df['rain.1h']

# If no rain, make the rain column and fill it with NaN
except KeyError:
print("'Rain' is not present in the JSON file.")
df['rain.1h'] = np.nan

return df

# Fucntion to check if there are a ['snow.1h'] column in the dataset, if not make one
def ensure_snow_column(df):
try:
_ = df['snow.1h']

# If no snow, make the snow column and fill it with NaN
except KeyError:
print("'Snow' is not present in the JSON file.")
df['snow.1h'] = np.nan

return df

# Function to fill NaN values in ['rain.1h'] columns with 0
def fill_rain_column(df):
try:
# Replace the NaN with 0, using pandas function
df['rain.1h'] = df['rain.1h'].fillna(0)

except KeyError:
print(["'rain.1h', not in df"])

return df

# Function to fill NaN values in ['snow.1h'] columns with 0
def fill_snow_column(df):
try:
# Replace the NaN with 0, using pandas function
df['snow.1h'] = df['snow.1h'].fillna(0)

except KeyError:
print(["'snow.1h', not in df"])

return df

# Function to 'normalize' the dataset, with index-changing and dropping meta-data
def extract_city_df(weather_data):
if 'list' in weather_data:
# Normalize the json for better readability
df = pd.json_normalize(weather_data['list'])

# Delete duplicates based on the dt row, all the other values can appear more than once, but the date should only appear once
df = df.drop_duplicates(subset=['dt'])

# The weather column dosnt have any releated information, therefor we delete it
df = df.drop(columns="weather")

# Convert 'dt' column from Unix timestamp to datetime and set it as the index
df['dt'] = pd.to_datetime(df['dt'], unit='s')
df.set_index('dt', inplace=True)
return df

else:
return None

0 comments on commit 83db88f

Please sign in to comment.