python
1 day, 5 hours ago
import unittest
from unittest.mock import MagicMock
import pandas as pd
import numpy as np
import bson
from datetime import datetime
from unittest.mock import patch
class TestAnalysisMethods(unittest.TestCase):
def setUp(self):
self.custom_dict = {
"COLUMNS": {
"col1": {"type": "Categorical", "is_unique": False, "n_distinct": 3, "p_distinct": 0.5},
"col2": {"type": "Numerical", "is_unique": False, "n_distinct": 5},
"col3": {"type": "Datetime", "is_unique": True, "n_distinct": 1},
"col4": {"type": "Categorical", "is_unique": False, "n_distinct": 10, "p_distinct": 0.91},
},
"ANALYSIS": {
"CATEGORICAL_COLUMNS": [],
"NUMERICAL_COLUMNS": [],
"DATETIME_COLUMNS": [],
"USE_FOR_ANOMALY": []
},
"TABLE": {
"TYPES": {"NUMERIC": 0, "DATETIME": 0}
},
"CORRELATIONS": {"PHI_K": [{"col1": {"col2": 1.0}}]},
}
self.query_results = [
{"col1": bson.Decimal128("123.45"), "col2": bson.int64.Int64(100), "col3": "2024-01-01T00:00:00"},
{"col1": bson.Decimal128("67.89"), "col2": bson.int64.Int64(200), "col3": None},
{"col1": None, "col2": bson.int64.Int64(300), "col3": "2024-02-01T00:00:00"},
]
@patch("module_name.update_recommended_resolution") # Replace `module_name` with the actual module name
def test_convert_df_obj_columns(self, mock_update_recommended_resolution):
mock_update_recommended_resolution.return_value = "resolution_mock"
df, resolution, stats = convert_df_obj_columns(self.query_results)
# Test DataFrame content and types
self.assertIsInstance(df, pd.DataFrame)
self.assertTrue(pd.api.types.is_float_dtype(df["col1"]))
self.assertTrue(pd.api.types.is_integer_dtype(df["col2"]))
self.assertTrue(pd.api.types.is_datetime64_any_dtype(df["col3"]))
# Test specific values
self.assertAlmostEqual(df["col1"].iloc[0], 123.45, places=2)
self.assertEqual(df["col2"].iloc[1], 200)
self.assertEqual(df["col3"].iloc[2], pd.Timestamp("2024-02-01"))
# Test the resolution output
self.assertEqual(resolution, "resolution_mock")
# Test stats output
self.assertIsInstance(stats, list)
for stat in stats:
self.assertIn("col_name", stat.keys())
self.assertIn("mean_len", stat.keys())
self.assertIn("dtype", stat.keys())
self.assertEqual(stat["dtype"], "str")
def test_empty_query_results(self):
df, resolution, stats = convert_df_obj_columns([])
# Test empty outputs
self.assertTrue(df.empty)
self.assertIsNone(resolution)
self.assertEqual(stats, [])
def test_fill_analysis(self):
result = fill_analysis(self.custom_dict, "col3", "resolution")
self.assertIn("col1", result["ANALYSIS"]["CATEGORICAL_COLUMNS"])
self.assertIn("col2", result["ANALYSIS"]["NUMERICAL_COLUMNS"])
self.assertNotIn("col3", result["COLUMNS"])
self.assertEqual(result["TABLE"]["TYPES"]["DATETIME"], 1)
def test_reduce_word_counts_size(self):
self.custom_dict["COLUMNS"]["col1"]["word_counts"] = {"word1": 5, "word2": 3, "others": 0}
result = reduce_word_counts_size(self.custom_dict)
self.assertEqual(result["COLUMNS"]["col1"]["word_counts"]["others"], 0)
def test_fill_use_for_anomaly(self):
result = fill_use_for_anomaly(self.custom_dict)
self.assertIn("col1", result["ANALYSIS"]["USE_FOR_ANOMALY"])
def test_use_for_anomaly_column(self):
self.assertTrue(use_for_anomaly_column(self.custom_dict, "col1"))
self.assertFalse(use_for_anomaly_column(self.custom_dict, "col3"))
def test_is_unique(self):
self.assertFalse(is_unique(self.custom_dict, "col1"))
self.assertTrue(is_unique(self.custom_dict, "col3"))
def test_n_distinct_is_1(self):
self.assertFalse(n_distinct_is_1(self.custom_dict, "col1"))
self.assertTrue(n_distinct_is_1(self.custom_dict, "col3"))
def test_is_categorical(self):
self.custom_dict["ANALYSIS"]["CATEGORICAL_COLUMNS"].append("col1")
self.assertTrue(is_categorical(self.custom_dict, "col1"))
self.assertFalse(is_categorical(self.custom_dict, "col2"))
def test_is_numerical(self):
self.custom_dict["ANALYSIS"]["NUMERICAL_COLUMNS"].append("col2")
self.assertTrue(is_numerical(self.custom_dict, "col2"))
self.assertFalse(is_numerical(self.custom_dict, "col1"))
def test_convert_percent_to_p(self):
self.custom_dict["COLUMNS"]["col1"].update({"5%": 5, "25%": 25, "95%": 95})
result = convert_percent_to_p(self.custom_dict)
self.assertIn("p5", result["COLUMNS"]["col1"])
self.assertIn("p25", result["COLUMNS"]["col1"])
self.assertIn("p95", result["COLUMNS"]["col1"])
if __name__ == "__main__":
unittest.main()
0 Comments
Please Login to Comment Here