Skip to content

Commit

Permalink
add function documentation to data.py
Browse files Browse the repository at this point in the history
  • Loading branch information
toravest committed May 23, 2025
1 parent c15c69c commit 9bc0dc1
Showing 1 changed file with 58 additions and 9 deletions.
67 changes: 58 additions & 9 deletions src/my_package/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from dotenv import load_dotenv
import json
import pandas as pd
import sys

load_dotenv()

Expand All @@ -18,7 +17,14 @@

# Gets the current data from the API - openweathermap.org
def fetch_current_data(city_name):

'''
This function taking in the city_name and, fetch the currentdata from the
API - openweathermap.org. The url is specified to use the users API_KEY and the city name
inserted, when the function is started.An if-statement makes sure the status.code gotten
from the request is 200, which means 'ok'. If so, it prints that the fetch is ok, and make
the data json compatible. And returns a standard folder for the data to be written to.
If the status code is not 200, it returns no data and the status code.
'''

# f-string url, to add the "custom" variables to the API-request
url = f"https://api.openweathermap.org/data/2.5/weather?q={city_name},NO&units=metric&appid={API_KEY}"
Expand All @@ -35,7 +41,6 @@ def fetch_current_data(city_name):

print("Data fetch: ok")
return data, folder


else:
# If html status code != 200, print the status code
Expand All @@ -44,7 +49,14 @@ def fetch_current_data(city_name):

# Gets the wanted timeperiod data from the API - openweathermap.org
def fetch_time_data(start_date, end_date, city_name):

'''
This function is taking in the city_name, and the start/end date. To fetch data from the
API - openweathermap.org, for the wanted place during the wanted time. The url is an f-string
to take in the custom variables for city_name, start_date, end_date and the users API_KEY.
An if-statement makes sure the status.code gotten from the request is 200, which means 'ok'.
If so, it prints that the fetch is ok, and make the data json compatible. And returns a standard
folder for the data to be written to. If the status code is not 200, it returns no data and the status code.
'''

# f-string url, to add the "custom" variables to the API-request
url = f"https://history.openweathermap.org/data/2.5/history/city?q={city_name},NO&units=metric&type=hour&start={start_date}&end={end_date}&appid={API_KEY}"
Expand All @@ -69,7 +81,12 @@ def fetch_time_data(start_date, end_date, city_name):

# Gets statistical data from the API - openweathermap.org
def fetch_stat_data(city_name):

'''
This function is taking in the city_name parameter, and fetch statistical weather data from the
API - openweathermap.org. An if-statement makes sure the status.code gotten from the request is 200, which
means 'ok'. If so, it prints that the fetch is ok, and make the data json compatible. And returns a standard
folder for the data to be written to. If the status code is not 200, it returns no data and the status code.
'''

# f-string url, to add the "custom" variables to the API-request
url = f"https://history.openweathermap.org/data/2.5/aggregated/year?q={city_name},NO&appid={API_KEY}&units=metric"
Expand All @@ -94,14 +111,21 @@ def fetch_stat_data(city_name):

# Write data to json-file
def write_data(data, folder, filename):
'''
This function is taking in the parameters, data: the json data we want to write.
Folder: the place we want the file to be. And filename: what the file should be named.
It then finds the way from the script to the root of the project, and navigate to the wanted
output place. '../data/json/output_{notebook}', if the folder do not exsist it will be created.
Then the json-data is written to a json-file, inside the right folder with the choosen filename.
'''
# Ensure the 'output_stedsdata' folder exists inside the 'data' folder at the root of the project
script_dir = os.path.dirname(os.path.abspath(__file__)) # Get the directory of the script
project_root = os.path.abspath(os.path.join(script_dir, os.pardir, os.pardir)) # Navigate to the root of the project
data_dir = os.path.join(project_root, 'data', folder)
os.makedirs(data_dir, exist_ok=True) # Creates 'data/output_stedsdata' folder if it doesn't exist
os.makedirs(data_dir, exist_ok=True) # Creates 'data/output_{notebook}' folder if it doesn't exist

# Write the JSON data to a file inside the 'output_stedsdata' folder
file_path = os.path.join(data_dir, f'data_{filename}.json') # Creates 'data/output_stedsdata/data_{filename}.json'
# Write the JSON data to a file inside the 'output_{notebook}' folder
file_path = os.path.join(data_dir, f'data_{filename}.json') # Creates 'data/json/output_{notebook}/data_{filename}.json'

# Opens and write the data to a json file
with open(file_path, 'w') as json_file:
Expand All @@ -113,6 +137,16 @@ def write_data(data, folder, filename):

# Function to 'normalize' the dataset, with index-changing and dropping meta-data
def extract_city_df(weather_data):
'''
This is a function taking in the parameter weather_data. This is the json-data written to a file.
This function will check for the 'list' in weather_data, becuase that is where the data is stored,
the data stored above is just meta-data we dont need. Then the data is stored like a dataframe,
and normalized using json_normaliza, for better readability and flat data structure.
Drop_duplicates checks if there are any duplicates of the column 'dt', meaning the time.
All the data can be the same, but one time should only appear once. Then the function converts
the dt to unix timestamp (easier to read), and makes it the index of the dataframe.
If the 'list' is not in the data, the function will return None.
'''
if 'list' in weather_data:
# Normalize the json for better readability
df = pd.json_normalize(weather_data['list'])
Expand All @@ -133,6 +167,13 @@ def extract_city_df(weather_data):

# Function to 'normalize' the dataset for statistic-data, with index-changing and dropping meta-data
def extract_city_data_stat(data):
'''
This is a function taking in the parameter data. This is statistical json-data written to a file.
This function will check for the 'result' in data, becuase that is where the data is stored,
the data stored above is just meta-data we dont need. Then the data is stored like a dataframe,
and normalized using json_normaliza, for better readability and flat data structure.
If the 'result' is not in the data, the function will return None.
'''
# Checks if the 'result' column is in the data
if 'result' in data:
# Normalize the json and store it as a dataframe for better readability
Expand All @@ -142,4 +183,12 @@ def extract_city_data_stat(data):
return df
else:
print("'result' not in data")
return None
return None

# This prints the documentation for the functions written inside '''these'''
print(fetch_current_data.__doc__)
print(fetch_time_data.__doc__)
print(fetch_stat_data.__doc__)
print(write_data.__doc__)
print(extract_city_df.__doc__)
print(extract_city_data_stat.__doc__)

0 comments on commit 9bc0dc1

Please sign in to comment.