python
1 day, 6 hours ago
import pytest
import pandas as pd
import numpy as np
from datetime import datetime
from decimal import Decimal
# Mock constants (replace with actual constants used in your methods)
COLUMNS = "columns"
TABLE = "table"
ANALYSIS = "analysis"
NUMERICAL_COLUMNS = "numerical_columns"
CATEGORICAL_COLUMNS = "categorical_columns"
DATETIME_COLUMNS = "datetime_columns"
RECOMMENDED_RESOLUTION = "recommended_resolution"
USE_FOR_ANOMALY = "use_for_anomaly"
DATE_START = "date_start"
DATE_END = "date_end"
LONGTEXT_COLUMNS = "longtext_columns"
CORRELATIONS = "correlations"
PROFILER_CATEGORIES = ["mean", "std_dev", "p_distinct"]
PHI_K = "phi_k"
METRIC_TIME = "METRIC_TIME"
WORD_COUNTS = "word_counts"
OTHERS = "others"
TYPE = "type"
NUMERIC = "numeric"
CATEGORICAL = "categorical"
DATETIME = "datetime"
P_DISTINCT = "p_distinct"
# Sample utility functions for mocking data
def mock_profiler_output():
return {
"variables": {
"column1": {"mean": 10, "std_dev": 5, "p_distinct": 0.1},
"column2": {"mean": 15, "std_dev": 7, "p_distinct": 0.2},
},
TABLE: {TYPES: {NUMERIC: 2, CATEGORICAL: 0}},
CORRELATIONS: {PHI_K: [{"column1": {"column2": 0.9}}]},
}
def mock_custom_dict():
return {
COLUMNS: {},
TABLE: {TYPES: {NUMERIC: 0, CATEGORICAL: 0, DATETIME: 0}},
ANALYSIS: {
NUMERICAL_COLUMNS: [],
CATEGORICAL_COLUMNS: [],
DATETIME_COLUMNS: [],
USE_FOR_ANOMALY: [],
},
}
# Test cases
def test_create_base_custom_dict():
profiler_output = mock_profiler_output()
custom_dict = mock_custom_dict()
date_start = "2022-01-01"
date_end = "2023-01-01"
timestamp_val = "timestamp"
long_text_cols = ["long_text_col"]
result = create_base_custom_dict(profiler_output, custom_dict, date_start, date_end, timestamp_val, long_text_cols)
assert result[COLUMNS]["column1"]["mean"] == 10
assert result[ANALYSIS][DATETIME_COLUMNS] == ["timestamp"]
assert result[ANALYSIS][DATE_START] == date_start
assert result[ANALYSIS][DATE_END] == date_end
def test_convert_column_to_float():
assert convert_column_to_float(10) == 10.0
assert np.isnan(convert_column_to_float(None))
def test_check_if_epoch():
current_time = int(time.time())
assert check_if_epoch(current_time) is True
assert check_if_epoch(current_time - 315569260) is True
assert check_if_epoch(1234567890) is True
def test_replace_period_with_hyphen():
custom_dict = mock_custom_dict()
custom_dict[COLUMNS] = {
"column1": {WORD_COUNTS: {"a.b": 10, "c.d": 20, OTHERS: 0}}
}
result = replace_period_with_hyphen(custom_dict)
assert "a_b" in result[COLUMNS]["column1"][WORD_COUNTS]
def test_numerical_to_categorical():
custom_dict = mock_custom_dict()
custom_dict[COLUMNS] = {
"num_col": {P_DISTINCT: 0.0}
}
custom_dict[ANALYSIS][NUMERICAL_COLUMNS].append("num_col")
result = numerical_to_categorical(custom_dict)
assert "num_col" in result[ANALYSIS][CATEGORICAL_COLUMNS]
assert "num_col" not in result[ANALYSIS][NUMERICAL_COLUMNS]
def test_update_recommended_resolution():
df = pd.DataFrame({"METRIC_TIME": [1_000_000, 2_000_000, 4_000_000]})
frequency = update_recommended_resolution(df)
assert frequency in ["5M", "1D"] # Expected valid frequencies
def test_create_mongo_dict():
result = create_mongo_dict("req1", "data1", "uuid1")
assert result["_id"] == "req1-data1"
assert result["requestId"] == "req1"
assert result["datasetId"] == "data1"
assert result["uuid"] == "uuid1"
0 Comments
Please Login to Comment Here