From 9bc0dc14aa214d2251395356a9f353ffad0e8107 Mon Sep 17 00:00:00 2001 From: toravest Date: Fri, 23 May 2025 08:22:53 +0200 Subject: [PATCH] add function documentation to data.py --- src/my_package/data.py | 67 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/src/my_package/data.py b/src/my_package/data.py index 74a91e2..b42fb76 100644 --- a/src/my_package/data.py +++ b/src/my_package/data.py @@ -4,7 +4,6 @@ from dotenv import load_dotenv import json import pandas as pd -import sys load_dotenv() @@ -18,7 +17,14 @@ # Gets the current data from the API - openweathermap.org def fetch_current_data(city_name): - + ''' + This function taking in the city_name and, fetch the currentdata from the + API - openweathermap.org. The url is specified to use the users API_KEY and the city name + inserted, when the function is started.An if-statement makes sure the status.code gotten + from the request is 200, which means 'ok'. If so, it prints that the fetch is ok, and make + the data json compatible. And returns a standard folder for the data to be written to. + If the status code is not 200, it returns no data and the status code. + ''' # f-string url, to add the "custom" variables to the API-request url = f"https://api.openweathermap.org/data/2.5/weather?q={city_name},NO&units=metric&appid={API_KEY}" @@ -35,7 +41,6 @@ def fetch_current_data(city_name): print("Data fetch: ok") return data, folder - else: # If html status code != 200, print the status code @@ -44,7 +49,14 @@ def fetch_current_data(city_name): # Gets the wanted timeperiod data from the API - openweathermap.org def fetch_time_data(start_date, end_date, city_name): - + ''' + This function is taking in the city_name, and the start/end date. To fetch data from the + API - openweathermap.org, for the wanted place during the wanted time. The url is an f-string + to take in the custom variables for city_name, start_date, end_date and the users API_KEY. + An if-statement makes sure the status.code gotten from the request is 200, which means 'ok'. + If so, it prints that the fetch is ok, and make the data json compatible. And returns a standard + folder for the data to be written to. If the status code is not 200, it returns no data and the status code. + ''' # f-string url, to add the "custom" variables to the API-request url = f"https://history.openweathermap.org/data/2.5/history/city?q={city_name},NO&units=metric&type=hour&start={start_date}&end={end_date}&appid={API_KEY}" @@ -69,7 +81,12 @@ def fetch_time_data(start_date, end_date, city_name): # Gets statistical data from the API - openweathermap.org def fetch_stat_data(city_name): - + ''' + This function is taking in the city_name parameter, and fetch statistical weather data from the + API - openweathermap.org. An if-statement makes sure the status.code gotten from the request is 200, which + means 'ok'. If so, it prints that the fetch is ok, and make the data json compatible. And returns a standard + folder for the data to be written to. If the status code is not 200, it returns no data and the status code. + ''' # f-string url, to add the "custom" variables to the API-request url = f"https://history.openweathermap.org/data/2.5/aggregated/year?q={city_name},NO&appid={API_KEY}&units=metric" @@ -94,14 +111,21 @@ def fetch_stat_data(city_name): # Write data to json-file def write_data(data, folder, filename): + ''' + This function is taking in the parameters, data: the json data we want to write. + Folder: the place we want the file to be. And filename: what the file should be named. + It then finds the way from the script to the root of the project, and navigate to the wanted + output place. '../data/json/output_{notebook}', if the folder do not exsist it will be created. + Then the json-data is written to a json-file, inside the right folder with the choosen filename. + ''' # Ensure the 'output_stedsdata' folder exists inside the 'data' folder at the root of the project script_dir = os.path.dirname(os.path.abspath(__file__)) # Get the directory of the script project_root = os.path.abspath(os.path.join(script_dir, os.pardir, os.pardir)) # Navigate to the root of the project data_dir = os.path.join(project_root, 'data', folder) - os.makedirs(data_dir, exist_ok=True) # Creates 'data/output_stedsdata' folder if it doesn't exist + os.makedirs(data_dir, exist_ok=True) # Creates 'data/output_{notebook}' folder if it doesn't exist - # Write the JSON data to a file inside the 'output_stedsdata' folder - file_path = os.path.join(data_dir, f'data_{filename}.json') # Creates 'data/output_stedsdata/data_{filename}.json' + # Write the JSON data to a file inside the 'output_{notebook}' folder + file_path = os.path.join(data_dir, f'data_{filename}.json') # Creates 'data/json/output_{notebook}/data_{filename}.json' # Opens and write the data to a json file with open(file_path, 'w') as json_file: @@ -113,6 +137,16 @@ def write_data(data, folder, filename): # Function to 'normalize' the dataset, with index-changing and dropping meta-data def extract_city_df(weather_data): + ''' + This is a function taking in the parameter weather_data. This is the json-data written to a file. + This function will check for the 'list' in weather_data, becuase that is where the data is stored, + the data stored above is just meta-data we dont need. Then the data is stored like a dataframe, + and normalized using json_normaliza, for better readability and flat data structure. + Drop_duplicates checks if there are any duplicates of the column 'dt', meaning the time. + All the data can be the same, but one time should only appear once. Then the function converts + the dt to unix timestamp (easier to read), and makes it the index of the dataframe. + If the 'list' is not in the data, the function will return None. + ''' if 'list' in weather_data: # Normalize the json for better readability df = pd.json_normalize(weather_data['list']) @@ -133,6 +167,13 @@ def extract_city_df(weather_data): # Function to 'normalize' the dataset for statistic-data, with index-changing and dropping meta-data def extract_city_data_stat(data): + ''' + This is a function taking in the parameter data. This is statistical json-data written to a file. + This function will check for the 'result' in data, becuase that is where the data is stored, + the data stored above is just meta-data we dont need. Then the data is stored like a dataframe, + and normalized using json_normaliza, for better readability and flat data structure. + If the 'result' is not in the data, the function will return None. + ''' # Checks if the 'result' column is in the data if 'result' in data: # Normalize the json and store it as a dataframe for better readability @@ -142,4 +183,12 @@ def extract_city_data_stat(data): return df else: print("'result' not in data") - return None \ No newline at end of file + return None + +# This prints the documentation for the functions written inside '''these''' +print(fetch_current_data.__doc__) +print(fetch_time_data.__doc__) +print(fetch_stat_data.__doc__) +print(write_data.__doc__) +print(extract_city_df.__doc__) +print(extract_city_data_stat.__doc__) \ No newline at end of file