python
14 hours, 7 minutes ago
# test_embedding_service.py
import unittest
from unittest.mock import Mock, patch
from embedding_service import EmbeddingService, split_list
from modelclasses.populate_embedding_data import PopulateEmbeddingData
import pandas as pd
import datetime
class TestEmbeddingService(unittest.TestCase):
def setUp(self):
# Mock configuration and external dependencies
config = {
"email": {"embeddings-task-recipients": ["test@example.com"]}
}
self.embeddings_util = Mock()
self.pg_service = Mock()
self.audit_service = Mock()
self.config_service = Mock()
self.oracle_service = Mock()
self.email_service = Mock()
# Initialize EmbeddingService with mocked dependencies
self.embedding_service = EmbeddingService(
config,
self.embeddings_util,
self.pg_service,
self.audit_service,
self.config_service,
self.oracle_service,
self.email_service
)
def test_split_list(self):
# Test split_list function with various cases
input_list = ["str1", "str2", "str3", "str4"]
expected_result = [["str1", "str2"], ["str3", "str4"]]
result = split_list(input_list, 2)
self.assertEqual(result, expected_result)
# Edge case: splitting into larger sublists than the input list size
result = split_list(input_list, 5)
self.assertEqual(result, [input_list])
def test_executor_with_texts(self):
# Test `executor` with an embedding_data that includes texts
embedding_data = PopulateEmbeddingData(
namespace="CALL_CENTER",
model="textembedding-gecko@003",
texts=["text1", "text2"]
)
trace_id = "12345"
with patch.object(self.embedding_service, 'process_texts_for_embedding') as mock_process_texts:
self.embedding_service.executor(embedding_data)
mock_process_texts.assert_called_once_with(embedding_data, trace_id)
def test_executor_without_texts(self):
# Test `executor` when no texts are provided and query execution is triggered
embedding_data = PopulateEmbeddingData(namespace="CALL_CENTER", model="textembedding-gecko@003")
trace_id = "12345"
# Mock config service response for query lists
query_dicts = [{
"IS_ACTIVE": "Y",
"NAME": "dimension1",
"QUERY": "SELECT * FROM SOME_TABLE"
}]
self.config_service.get_config_clob.return_value = str(query_dicts)
# Mock oracle query result
self.oracle_service.execute_select_query.return_value = '[["result1"], ["result2"]]'
with patch.object(self.embedding_service, 'process_texts_for_embedding') as mock_process_texts:
self.embedding_service.executor(embedding_data)
mock_process_texts.assert_called()
def test_process_texts_for_embedding(self):
# Test process_texts_for_embedding with sample data
embedding_data = PopulateEmbeddingData(
namespace="CALL_CENTER",
model="textembedding-gecko@003",
texts=["text1", "text2", "text2"]
)
trace_id = "12345"
with patch.object(self.embedding_service, 'get_embedding_and_store') as mock_get_embedding_and_store:
self.embedding_service.process_texts_for_embedding(embedding_data, trace_id)
# Verify audit entry update was called
self.audit_service.update_audit_entry.assert_called_once_with(trace_id, ['STATUS'], ["'Processing'"])
# Verify get_embedding_and_store is called with deduplicated texts
expected_split_list = [["text1", "text2"]]
mock_get_embedding_and_store.assert_called_once_with(embedding_data, trace_id, expected_split_list)
def test_get_embedding_and_store(self):
# Test get_embedding_and_store with mock data and verify DB interactions
embedding_data = PopulateEmbeddingData(
namespace="CALL_CENTER",
model="textembedding-gecko@003",
texts=["text1", "text2"]
)
trace_id = "12345"
split_list_of_strings = [["text1"], ["text2"]]
self.config_service.get_config_value.return_value = "test_model"
# Mock database query results to simulate existing embeddings
self.pg_service.execute_select_query.side_effect = [None, '[["text1"]]']
# Mock embeddings utility response
self.embeddings_util.get_embeddings.return_value = pd.DataFrame({
"Data": ["text2"],
"Embedding": [[0.1, 0.2]]
})
# Execute function
with patch("embedding_service.logger") as mock_logger:
self.embedding_service.get_embedding_and_store(embedding_data, trace_id, split_list_of_strings)
# Verify that `insert_data` was called with the expected values for newly embedded texts
expected_embeddings = (
("text2", [0.1, 0.2], "CALL_CENTER", "test_model", datetime.datetime.now())
)
self.pg_service.insert_data.assert_called_once_with(
"INSERT INTO text_embeddings_lookup VALUES %s", expected_embeddings
)
# Ensure the email summary includes the correct counts
body = f'''
<p>Total Intents processed : {len(embedding_data.texts)}</p>
<p>Already Embedded and present in DB: 1</p>
<p>Newly Embedded Intents: 1</p>
'''
self.email_service.send_mail.assert_called_once_with(
self.embedding_service.recipients, 'Get Embeddings Task finished', body, None
)
0 Comments
Please Login to Comment Here