Skip to content

Commit

Permalink
test - that data is clean
Browse files Browse the repository at this point in the history
  • Loading branch information
hannhegg committed Apr 19, 2025
1 parent f47456f commit 035d281
Showing 1 changed file with 103 additions and 0 deletions.
103 changes: 103 additions & 0 deletions tests/unit/test_clean_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import unittest
import pandas as pd
import numpy as np
from src.my_package.util import (
kelvin_to_celsius,
ensure_rain_column,
ensure_snow_column,
fill_rain_column,
fill_snow_column,
extract_city_df
)

class TestUtilFunctions(unittest.TestCase):

# Test kelvin to celsius conversion
def test_kelvin_to_celsius(self):
kelvin_temp = 300
expected_celsius = 26.85

result = kelvin_to_celsius(kelvin_temp)

self.assertAlmostEqual(result, expected_celsius, places=2)

# Test if 'rain.1h' column is added when not present
def test_ensure_rain_column(self):
df = pd.DataFrame({
'temp': [300, 302, 305],
'humidity': [80, 82, 78]
})

df = ensure_rain_column(df)

# Check if the 'rain.1h' column is present after function call
self.assertTrue('rain.1h' in df.columns)
# Check if the column has NaN values
self.assertTrue(df['rain.1h'].isna().all())

# Test if 'snow.1h' column is added when not present
def test_ensure_snow_column(self):
df = pd.DataFrame({
'temp': [300, 302, 305],
'humidity': [80, 82, 78]
})

df = ensure_snow_column(df)

# Check if the 'snow.1h' column is present after function call
self.assertTrue('snow.1h' in df.columns)
# Check if the column has NaN values
self.assertTrue(df['snow.1h'].isna().all())

# Test if NaN values in 'rain.1h' are filled with 0
def test_fill_rain_column(self):
df = pd.DataFrame({
'temp': [300, 302, 305],
'rain.1h': [np.nan, 1.0, np.nan]
})

df = fill_rain_column(df)

# Check if NaN values are replaced with 0
self.assertEqual(df['rain.1h'].iloc[0], 0)
self.assertEqual(df['rain.1h'].iloc[2], 0)

# Test if NaN values in 'snow.1h' are filled with 0
def test_fill_snow_column(self):
df = pd.DataFrame({
'temp': [300, 302, 305],
'snow.1h': [np.nan, 0.5, np.nan]
})

df = fill_snow_column(df)

# Check if NaN values are replaced with 0
self.assertEqual(df['snow.1h'].iloc[0], 0)
self.assertEqual(df['snow.1h'].iloc[2], 0)

# Test extracting city DataFrame from JSON data
def test_extract_city_df(self):
weather_data = {
"list": [
{"dt": 1618245600, "temp": 290, "humidity": 85, "weather": [{"description": "clear sky"}]},
{"dt": 1618255600, "temp": 295, "humidity": 80, "weather": [{"description": "cloudy"}]},
{"dt": 1618265600, "temp": 300, "humidity": 75, "weather": [{"description": "sunny"}]}
]
}

df = extract_city_df(weather_data)

# Ensure that 'dt' is the index and in datetime format
self.assertTrue(pd.api.types.is_datetime64_any_dtype(df.index))

# Check if duplicates are removed based on 'dt'
self.assertEqual(len(df), 3)

# Check if 'weather' column was dropped
self.assertNotIn('weather', df.columns)

# Check the correct conversion of 'dt' to datetime
self.assertEqual(df.index[0], pd.to_datetime(1618245600, unit='s'))

if __name__ == '__main__':
unittest.main()

0 comments on commit 035d281

Please sign in to comment.