diff --git a/tests/unit/test_clean_data.py b/tests/unit/test_clean_data.py new file mode 100644 index 0000000..066cd37 --- /dev/null +++ b/tests/unit/test_clean_data.py @@ -0,0 +1,103 @@ +import unittest +import pandas as pd +import numpy as np +from src.my_package.util import ( + kelvin_to_celsius, + ensure_rain_column, + ensure_snow_column, + fill_rain_column, + fill_snow_column, + extract_city_df +) + +class TestUtilFunctions(unittest.TestCase): + + # Test kelvin to celsius conversion + def test_kelvin_to_celsius(self): + kelvin_temp = 300 + expected_celsius = 26.85 + + result = kelvin_to_celsius(kelvin_temp) + + self.assertAlmostEqual(result, expected_celsius, places=2) + + # Test if 'rain.1h' column is added when not present + def test_ensure_rain_column(self): + df = pd.DataFrame({ + 'temp': [300, 302, 305], + 'humidity': [80, 82, 78] + }) + + df = ensure_rain_column(df) + + # Check if the 'rain.1h' column is present after function call + self.assertTrue('rain.1h' in df.columns) + # Check if the column has NaN values + self.assertTrue(df['rain.1h'].isna().all()) + + # Test if 'snow.1h' column is added when not present + def test_ensure_snow_column(self): + df = pd.DataFrame({ + 'temp': [300, 302, 305], + 'humidity': [80, 82, 78] + }) + + df = ensure_snow_column(df) + + # Check if the 'snow.1h' column is present after function call + self.assertTrue('snow.1h' in df.columns) + # Check if the column has NaN values + self.assertTrue(df['snow.1h'].isna().all()) + + # Test if NaN values in 'rain.1h' are filled with 0 + def test_fill_rain_column(self): + df = pd.DataFrame({ + 'temp': [300, 302, 305], + 'rain.1h': [np.nan, 1.0, np.nan] + }) + + df = fill_rain_column(df) + + # Check if NaN values are replaced with 0 + self.assertEqual(df['rain.1h'].iloc[0], 0) + self.assertEqual(df['rain.1h'].iloc[2], 0) + + # Test if NaN values in 'snow.1h' are filled with 0 + def test_fill_snow_column(self): + df = pd.DataFrame({ + 'temp': [300, 302, 305], + 'snow.1h': [np.nan, 0.5, np.nan] + }) + + df = fill_snow_column(df) + + # Check if NaN values are replaced with 0 + self.assertEqual(df['snow.1h'].iloc[0], 0) + self.assertEqual(df['snow.1h'].iloc[2], 0) + + # Test extracting city DataFrame from JSON data + def test_extract_city_df(self): + weather_data = { + "list": [ + {"dt": 1618245600, "temp": 290, "humidity": 85, "weather": [{"description": "clear sky"}]}, + {"dt": 1618255600, "temp": 295, "humidity": 80, "weather": [{"description": "cloudy"}]}, + {"dt": 1618265600, "temp": 300, "humidity": 75, "weather": [{"description": "sunny"}]} + ] + } + + df = extract_city_df(weather_data) + + # Ensure that 'dt' is the index and in datetime format + self.assertTrue(pd.api.types.is_datetime64_any_dtype(df.index)) + + # Check if duplicates are removed based on 'dt' + self.assertEqual(len(df), 3) + + # Check if 'weather' column was dropped + self.assertNotIn('weather', df.columns) + + # Check the correct conversion of 'dt' to datetime + self.assertEqual(df.index[0], pd.to_datetime(1618245600, unit='s')) + +if __name__ == '__main__': + unittest.main()