Start generating pytest for training service
This commit is contained in:
630
services/training/tests/test_performance.py
Normal file
630
services/training/tests/test_performance.py
Normal file
@@ -0,0 +1,630 @@
|
||||
# ================================================================
|
||||
# services/training/tests/test_performance.py
|
||||
# ================================================================
|
||||
"""
|
||||
Performance and Load Testing for Training Service
|
||||
Tests training performance with real-world data volumes
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import psutil
|
||||
import gc
|
||||
from typing import List, Dict, Any
|
||||
import logging
|
||||
|
||||
from app.ml.trainer import BakeryMLTrainer
|
||||
from app.ml.data_processor import BakeryDataProcessor
|
||||
from app.services.training_service import TrainingService
|
||||
|
||||
|
||||
class TestTrainingPerformance:
|
||||
"""Performance tests for training service components"""
|
||||
|
||||
@pytest.fixture
|
||||
def large_sales_dataset(self):
|
||||
"""Generate large dataset for performance testing (2 years of data)"""
|
||||
start_date = datetime(2022, 1, 1)
|
||||
end_date = datetime(2024, 1, 1)
|
||||
|
||||
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
||||
products = [
|
||||
"Pan Integral", "Pan Blanco", "Croissant", "Magdalenas",
|
||||
"Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras",
|
||||
"Donuts", "Berlinas", "Napolitanas", "Ensaimadas"
|
||||
]
|
||||
|
||||
data = []
|
||||
for date in date_range:
|
||||
for product in products:
|
||||
# Realistic sales simulation
|
||||
base_quantity = np.random.randint(5, 150)
|
||||
|
||||
# Seasonal patterns
|
||||
if date.month in [12, 1]: # Winter/Holiday season
|
||||
base_quantity *= 1.4
|
||||
elif date.month in [6, 7, 8]: # Summer
|
||||
base_quantity *= 0.8
|
||||
|
||||
# Weekly patterns
|
||||
if date.weekday() >= 5: # Weekends
|
||||
base_quantity *= 1.2
|
||||
elif date.weekday() == 0: # Monday
|
||||
base_quantity *= 0.7
|
||||
|
||||
# Add noise
|
||||
quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1)))
|
||||
|
||||
data.append({
|
||||
"date": date.strftime("%Y-%m-%d"),
|
||||
"product": product,
|
||||
"quantity": quantity,
|
||||
"revenue": round(quantity * np.random.uniform(1.5, 8.0), 2),
|
||||
"temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1),
|
||||
"precipitation": max(0, np.random.exponential(0.8)),
|
||||
"is_weekend": date.weekday() >= 5,
|
||||
"is_holiday": self._is_spanish_holiday(date)
|
||||
})
|
||||
|
||||
return pd.DataFrame(data)
|
||||
|
||||
def _is_spanish_holiday(self, date: datetime) -> bool:
|
||||
"""Check if date is a Spanish holiday"""
|
||||
holidays = [
|
||||
(1, 1), # New Year
|
||||
(1, 6), # Epiphany
|
||||
(5, 1), # Labor Day
|
||||
(8, 15), # Assumption
|
||||
(10, 12), # National Day
|
||||
(11, 1), # All Saints
|
||||
(12, 6), # Constitution Day
|
||||
(12, 8), # Immaculate Conception
|
||||
(12, 25), # Christmas
|
||||
]
|
||||
return (date.month, date.day) in holidays
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_product_training_performance(self, large_sales_dataset):
|
||||
"""Test performance of single product training with large dataset"""
|
||||
|
||||
trainer = BakeryMLTrainer()
|
||||
product_data = large_sales_dataset[large_sales_dataset['product'] == 'Pan Integral'].copy()
|
||||
|
||||
# Measure memory before training
|
||||
process = psutil.Process()
|
||||
memory_before = process.memory_info().rss / 1024 / 1024 # MB
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
result = await trainer.train_single_product(
|
||||
tenant_id="perf_test_tenant",
|
||||
product_name="Pan Integral",
|
||||
sales_data=product_data,
|
||||
config={
|
||||
"include_weather": True,
|
||||
"include_traffic": False, # Skip traffic for performance
|
||||
"seasonality_mode": "additive"
|
||||
}
|
||||
)
|
||||
|
||||
end_time = time.time()
|
||||
training_duration = end_time - start_time
|
||||
|
||||
# Measure memory after training
|
||||
memory_after = process.memory_info().rss / 1024 / 1024 # MB
|
||||
memory_used = memory_after - memory_before
|
||||
|
||||
# Performance assertions
|
||||
assert training_duration < 120, f"Training took too long: {training_duration:.2f}s"
|
||||
assert memory_used < 500, f"Memory usage too high: {memory_used:.2f}MB"
|
||||
assert result['status'] == 'completed'
|
||||
|
||||
# Quality assertions
|
||||
metrics = result['metrics']
|
||||
assert metrics['mape'] < 50, f"MAPE too high: {metrics['mape']:.2f}%"
|
||||
|
||||
print(f"Performance Results:")
|
||||
print(f" Training Duration: {training_duration:.2f}s")
|
||||
print(f" Memory Used: {memory_used:.2f}MB")
|
||||
print(f" Data Points: {len(product_data)}")
|
||||
print(f" MAPE: {metrics['mape']:.2f}%")
|
||||
print(f" RMSE: {metrics['rmse']:.2f}")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_training_performance(self, large_sales_dataset):
|
||||
"""Test performance of concurrent training jobs"""
|
||||
|
||||
trainer = BakeryMLTrainer()
|
||||
products = ["Pan Integral", "Croissant", "Magdalenas"]
|
||||
|
||||
async def train_product(product_name: str):
|
||||
"""Train a single product"""
|
||||
product_data = large_sales_dataset[large_sales_dataset['product'] == product_name].copy()
|
||||
|
||||
start_time = time.time()
|
||||
result = await trainer.train_single_product(
|
||||
tenant_id=f"concurrent_test_{product_name.replace(' ', '_').lower()}",
|
||||
product_name=product_name,
|
||||
sales_data=product_data,
|
||||
config={"include_weather": True, "include_traffic": False}
|
||||
)
|
||||
end_time = time.time()
|
||||
|
||||
return {
|
||||
'product': product_name,
|
||||
'duration': end_time - start_time,
|
||||
'status': result['status'],
|
||||
'metrics': result.get('metrics', {})
|
||||
}
|
||||
|
||||
# Run concurrent training
|
||||
start_time = time.time()
|
||||
tasks = [train_product(product) for product in products]
|
||||
results = await asyncio.gather(*tasks)
|
||||
total_time = time.time() - start_time
|
||||
|
||||
# Verify all trainings completed
|
||||
for result in results:
|
||||
assert result['status'] == 'completed'
|
||||
assert result['duration'] < 120 # Individual training time
|
||||
|
||||
# Concurrent execution should be faster than sequential
|
||||
sequential_time_estimate = sum(r['duration'] for r in results)
|
||||
efficiency = sequential_time_estimate / total_time
|
||||
|
||||
assert efficiency > 1.5, f"Concurrency efficiency too low: {efficiency:.2f}x"
|
||||
|
||||
print(f"Concurrent Training Results:")
|
||||
print(f" Total Time: {total_time:.2f}s")
|
||||
print(f" Sequential Estimate: {sequential_time_estimate:.2f}s")
|
||||
print(f" Efficiency: {efficiency:.2f}x")
|
||||
|
||||
for result in results:
|
||||
print(f" {result['product']}: {result['duration']:.2f}s, MAPE: {result['metrics'].get('mape', 'N/A'):.2f}%")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_data_processing_scalability(self, large_sales_dataset):
|
||||
"""Test data processing performance with increasing data sizes"""
|
||||
|
||||
data_processor = BakeryDataProcessor()
|
||||
|
||||
# Test with different data sizes
|
||||
data_sizes = [1000, 5000, 10000, 20000, len(large_sales_dataset)]
|
||||
performance_results = []
|
||||
|
||||
for size in data_sizes:
|
||||
# Take a sample of the specified size
|
||||
sample_data = large_sales_dataset.head(size).copy()
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Process the data
|
||||
processed_data = await data_processor.prepare_training_data(
|
||||
sales_data=sample_data,
|
||||
include_weather=True,
|
||||
include_traffic=True,
|
||||
tenant_id="scalability_test",
|
||||
product_name="Pan Integral"
|
||||
)
|
||||
|
||||
processing_time = time.time() - start_time
|
||||
|
||||
performance_results.append({
|
||||
'data_size': size,
|
||||
'processing_time': processing_time,
|
||||
'processed_rows': len(processed_data),
|
||||
'throughput': size / processing_time if processing_time > 0 else 0
|
||||
})
|
||||
|
||||
# Verify linear or sub-linear scaling
|
||||
for i in range(1, len(performance_results)):
|
||||
prev_result = performance_results[i-1]
|
||||
curr_result = performance_results[i]
|
||||
|
||||
size_ratio = curr_result['data_size'] / prev_result['data_size']
|
||||
time_ratio = curr_result['processing_time'] / prev_result['processing_time']
|
||||
|
||||
# Processing time should scale better than linearly
|
||||
assert time_ratio < size_ratio * 1.5, f"Poor scaling at size {curr_result['data_size']}"
|
||||
|
||||
print("Data Processing Scalability Results:")
|
||||
for result in performance_results:
|
||||
print(f" Size: {result['data_size']:,} rows, Time: {result['processing_time']:.2f}s, "
|
||||
f"Throughput: {result['throughput']:.0f} rows/s")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_memory_usage_optimization(self, large_sales_dataset):
|
||||
"""Test memory usage optimization during training"""
|
||||
|
||||
trainer = BakeryMLTrainer()
|
||||
process = psutil.Process()
|
||||
|
||||
# Baseline memory
|
||||
gc.collect() # Force garbage collection
|
||||
baseline_memory = process.memory_info().rss / 1024 / 1024 # MB
|
||||
|
||||
memory_snapshots = [{'stage': 'baseline', 'memory_mb': baseline_memory}]
|
||||
|
||||
# Load data
|
||||
product_data = large_sales_dataset[large_sales_dataset['product'] == 'Pan Integral'].copy()
|
||||
current_memory = process.memory_info().rss / 1024 / 1024
|
||||
memory_snapshots.append({'stage': 'data_loaded', 'memory_mb': current_memory})
|
||||
|
||||
# Train model
|
||||
result = await trainer.train_single_product(
|
||||
tenant_id="memory_test_tenant",
|
||||
product_name="Pan Integral",
|
||||
sales_data=product_data,
|
||||
config={"include_weather": True, "include_traffic": True}
|
||||
)
|
||||
|
||||
current_memory = process.memory_info().rss / 1024 / 1024
|
||||
memory_snapshots.append({'stage': 'model_trained', 'memory_mb': current_memory})
|
||||
|
||||
# Cleanup
|
||||
del product_data
|
||||
del result
|
||||
gc.collect()
|
||||
|
||||
final_memory = process.memory_info().rss / 1024 / 1024
|
||||
memory_snapshots.append({'stage': 'cleanup', 'memory_mb': final_memory})
|
||||
|
||||
# Memory assertions
|
||||
peak_memory = max(snapshot['memory_mb'] for snapshot in memory_snapshots)
|
||||
memory_increase = peak_memory - baseline_memory
|
||||
memory_after_cleanup = final_memory - baseline_memory
|
||||
|
||||
assert memory_increase < 800, f"Peak memory increase too high: {memory_increase:.2f}MB"
|
||||
assert memory_after_cleanup < 100, f"Memory not properly cleaned up: {memory_after_cleanup:.2f}MB"
|
||||
|
||||
print("Memory Usage Analysis:")
|
||||
for snapshot in memory_snapshots:
|
||||
print(f" {snapshot['stage']}: {snapshot['memory_mb']:.2f}MB")
|
||||
print(f" Peak increase: {memory_increase:.2f}MB")
|
||||
print(f" After cleanup: {memory_after_cleanup:.2f}MB")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_training_service_throughput(self, large_sales_dataset):
|
||||
"""Test training service throughput with multiple requests"""
|
||||
|
||||
training_service = TrainingService()
|
||||
|
||||
# Simulate multiple training requests
|
||||
num_requests = 5
|
||||
products = ["Pan Integral", "Croissant", "Magdalenas", "Empanadas", "Tarta Chocolate"]
|
||||
|
||||
async def execute_training_request(request_id: int, product: str):
|
||||
"""Execute a single training request"""
|
||||
product_data = large_sales_dataset[large_sales_dataset['product'] == product].copy()
|
||||
|
||||
with patch.object(training_service, '_fetch_sales_data', return_value=product_data):
|
||||
start_time = time.time()
|
||||
|
||||
result = await training_service.execute_training_job(
|
||||
db=None, # Mock DB session
|
||||
tenant_id=f"throughput_test_tenant_{request_id}",
|
||||
job_id=f"job_{request_id}_{product.replace(' ', '_').lower()}",
|
||||
request={
|
||||
'products': [product],
|
||||
'include_weather': True,
|
||||
'include_traffic': False,
|
||||
'config': {'seasonality_mode': 'additive'}
|
||||
}
|
||||
)
|
||||
|
||||
duration = time.time() - start_time
|
||||
return {
|
||||
'request_id': request_id,
|
||||
'product': product,
|
||||
'duration': duration,
|
||||
'status': result.get('status', 'unknown'),
|
||||
'models_trained': len(result.get('models_trained', []))
|
||||
}
|
||||
|
||||
# Execute requests concurrently
|
||||
start_time = time.time()
|
||||
tasks = [
|
||||
execute_training_request(i, products[i % len(products)])
|
||||
for i in range(num_requests)
|
||||
]
|
||||
results = await asyncio.gather(*tasks)
|
||||
total_time = time.time() - start_time
|
||||
|
||||
# Calculate throughput metrics
|
||||
successful_requests = sum(1 for r in results if r['status'] == 'completed')
|
||||
throughput = successful_requests / total_time # requests per second
|
||||
|
||||
# Performance assertions
|
||||
assert successful_requests >= num_requests * 0.8, "Too many failed requests"
|
||||
assert throughput >= 0.1, f"Throughput too low: {throughput:.3f} req/s"
|
||||
assert total_time < 300, f"Total time too long: {total_time:.2f}s"
|
||||
|
||||
print(f"Training Service Throughput Results:")
|
||||
print(f" Total Requests: {num_requests}")
|
||||
print(f" Successful: {successful_requests}")
|
||||
print(f" Total Time: {total_time:.2f}s")
|
||||
print(f" Throughput: {throughput:.3f} req/s")
|
||||
print(f" Average Request Time: {total_time/num_requests:.2f}s")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_large_dataset_edge_cases(self, large_sales_dataset):
|
||||
"""Test handling of edge cases with large datasets"""
|
||||
|
||||
data_processor = BakeryDataProcessor()
|
||||
|
||||
# Test 1: Dataset with many missing values
|
||||
corrupted_data = large_sales_dataset.copy()
|
||||
# Introduce 30% missing values randomly
|
||||
mask = np.random.random(len(corrupted_data)) < 0.3
|
||||
corrupted_data.loc[mask, 'quantity'] = np.nan
|
||||
|
||||
start_time = time.time()
|
||||
result = await data_processor.validate_data_quality(corrupted_data)
|
||||
validation_time = time.time() - start_time
|
||||
|
||||
assert validation_time < 10, f"Validation too slow: {validation_time:.2f}s"
|
||||
assert result['is_valid'] is False
|
||||
assert 'high_missing_data' in result['issues']
|
||||
|
||||
# Test 2: Dataset with extreme outliers
|
||||
outlier_data = large_sales_dataset.copy()
|
||||
# Add extreme outliers (100x normal values)
|
||||
outlier_indices = np.random.choice(len(outlier_data), size=int(len(outlier_data) * 0.01), replace=False)
|
||||
outlier_data.loc[outlier_indices, 'quantity'] *= 100
|
||||
|
||||
start_time = time.time()
|
||||
cleaned_data = await data_processor.clean_outliers(outlier_data)
|
||||
cleaning_time = time.time() - start_time
|
||||
|
||||
assert cleaning_time < 15, f"Outlier cleaning too slow: {cleaning_time:.2f}s"
|
||||
assert len(cleaned_data) > len(outlier_data) * 0.95 # Should retain most data
|
||||
|
||||
# Test 3: Very sparse data (many products with few sales)
|
||||
sparse_data = large_sales_dataset.copy()
|
||||
# Keep only 10% of data for each product randomly
|
||||
sparse_data = sparse_data.groupby('product').apply(
|
||||
lambda x: x.sample(n=max(1, int(len(x) * 0.1)))
|
||||
).reset_index(drop=True)
|
||||
|
||||
start_time = time.time()
|
||||
validation_result = await data_processor.validate_data_quality(sparse_data)
|
||||
sparse_validation_time = time.time() - start_time
|
||||
|
||||
assert sparse_validation_time < 5, f"Sparse data validation too slow: {sparse_validation_time:.2f}s"
|
||||
|
||||
print("Edge Case Performance Results:")
|
||||
print(f" Corrupted data validation: {validation_time:.2f}s")
|
||||
print(f" Outlier cleaning: {cleaning_time:.2f}s")
|
||||
print(f" Sparse data validation: {sparse_validation_time:.2f}s")
|
||||
|
||||
|
||||
class TestTrainingServiceLoad:
|
||||
"""Load testing for training service under stress"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sustained_load_training(self, large_sales_dataset):
|
||||
"""Test training service under sustained load"""
|
||||
|
||||
trainer = BakeryMLTrainer()
|
||||
|
||||
# Define load test parameters
|
||||
duration_minutes = 2 # Run for 2 minutes
|
||||
requests_per_minute = 3
|
||||
|
||||
products = ["Pan Integral", "Croissant", "Magdalenas"]
|
||||
|
||||
async def sustained_training_worker(worker_id: int, duration: float):
|
||||
"""Worker that continuously submits training requests"""
|
||||
start_time = time.time()
|
||||
completed_requests = 0
|
||||
failed_requests = 0
|
||||
|
||||
while time.time() - start_time < duration:
|
||||
try:
|
||||
product = products[completed_requests % len(products)]
|
||||
product_data = large_sales_dataset[
|
||||
large_sales_dataset['product'] == product
|
||||
].copy()
|
||||
|
||||
result = await trainer.train_single_product(
|
||||
tenant_id=f"load_test_worker_{worker_id}",
|
||||
product_name=product,
|
||||
sales_data=product_data,
|
||||
config={"include_weather": False, "include_traffic": False} # Minimal config for speed
|
||||
)
|
||||
|
||||
if result['status'] == 'completed':
|
||||
completed_requests += 1
|
||||
else:
|
||||
failed_requests += 1
|
||||
|
||||
except Exception as e:
|
||||
failed_requests += 1
|
||||
logging.error(f"Training request failed: {e}")
|
||||
|
||||
# Wait before next request
|
||||
await asyncio.sleep(60 / requests_per_minute)
|
||||
|
||||
return {
|
||||
'worker_id': worker_id,
|
||||
'completed': completed_requests,
|
||||
'failed': failed_requests,
|
||||
'duration': time.time() - start_time
|
||||
}
|
||||
|
||||
# Start multiple workers
|
||||
num_workers = 2
|
||||
duration_seconds = duration_minutes * 60
|
||||
|
||||
start_time = time.time()
|
||||
tasks = [
|
||||
sustained_training_worker(i, duration_seconds)
|
||||
for i in range(num_workers)
|
||||
]
|
||||
results = await asyncio.gather(*tasks)
|
||||
total_time = time.time() - start_time
|
||||
|
||||
# Analyze results
|
||||
total_completed = sum(r['completed'] for r in results)
|
||||
total_failed = sum(r['failed'] for r in results)
|
||||
success_rate = total_completed / (total_completed + total_failed) if (total_completed + total_failed) > 0 else 0
|
||||
|
||||
# Performance assertions
|
||||
assert success_rate >= 0.8, f"Success rate too low: {success_rate:.2%}"
|
||||
assert total_completed >= duration_minutes * requests_per_minute * num_workers * 0.7, "Throughput too low"
|
||||
|
||||
print(f"Sustained Load Test Results:")
|
||||
print(f" Duration: {total_time:.2f}s")
|
||||
print(f" Workers: {num_workers}")
|
||||
print(f" Completed Requests: {total_completed}")
|
||||
print(f" Failed Requests: {total_failed}")
|
||||
print(f" Success Rate: {success_rate:.2%}")
|
||||
print(f" Average Throughput: {total_completed/total_time:.2f} req/s")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resource_exhaustion_recovery(self, large_sales_dataset):
|
||||
"""Test service recovery from resource exhaustion"""
|
||||
|
||||
trainer = BakeryMLTrainer()
|
||||
|
||||
# Simulate resource exhaustion by running many concurrent requests
|
||||
num_concurrent = 10 # High concurrency to stress the system
|
||||
|
||||
async def resource_intensive_task(task_id: int):
|
||||
"""Task designed to consume resources"""
|
||||
try:
|
||||
# Use all products to increase memory usage
|
||||
all_products_data = large_sales_dataset.copy()
|
||||
|
||||
result = await trainer.train_tenant_models(
|
||||
tenant_id=f"resource_test_{task_id}",
|
||||
sales_data=all_products_data,
|
||||
config={
|
||||
"train_all_products": True,
|
||||
"include_weather": True,
|
||||
"include_traffic": True
|
||||
}
|
||||
)
|
||||
|
||||
return {'task_id': task_id, 'status': 'completed', 'error': None}
|
||||
|
||||
except Exception as e:
|
||||
return {'task_id': task_id, 'status': 'failed', 'error': str(e)}
|
||||
|
||||
# Launch all tasks simultaneously
|
||||
start_time = time.time()
|
||||
tasks = [resource_intensive_task(i) for i in range(num_concurrent)]
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
duration = time.time() - start_time
|
||||
|
||||
# Analyze results
|
||||
completed = sum(1 for r in results if isinstance(r, dict) and r['status'] == 'completed')
|
||||
failed = sum(1 for r in results if isinstance(r, dict) and r['status'] == 'failed')
|
||||
exceptions = sum(1 for r in results if isinstance(r, Exception))
|
||||
|
||||
# The system should handle some failures gracefully
|
||||
# but should complete at least some requests
|
||||
total_processed = completed + failed + exceptions
|
||||
processing_rate = total_processed / num_concurrent
|
||||
|
||||
assert processing_rate >= 0.5, f"Too many requests not processed: {processing_rate:.2%}"
|
||||
assert duration < 600, f"Recovery took too long: {duration:.2f}s" # 10 minutes max
|
||||
|
||||
print(f"Resource Exhaustion Test Results:")
|
||||
print(f" Concurrent Requests: {num_concurrent}")
|
||||
print(f" Completed: {completed}")
|
||||
print(f" Failed: {failed}")
|
||||
print(f" Exceptions: {exceptions}")
|
||||
print(f" Duration: {duration:.2f}s")
|
||||
print(f" Processing Rate: {processing_rate:.2%}")
|
||||
|
||||
|
||||
# ================================================================
|
||||
# BENCHMARK UTILITIES
|
||||
# ================================================================
|
||||
|
||||
class PerformanceBenchmark:
|
||||
"""Utility class for performance benchmarking"""
|
||||
|
||||
@staticmethod
|
||||
def measure_execution_time(func):
|
||||
"""Decorator to measure execution time"""
|
||||
async def wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
result = await func(*args, **kwargs)
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
if hasattr(result, 'update') and isinstance(result, dict):
|
||||
result['execution_time'] = execution_time
|
||||
|
||||
return result
|
||||
return wrapper
|
||||
|
||||
@staticmethod
|
||||
def memory_profiler(func):
|
||||
"""Decorator to profile memory usage"""
|
||||
async def wrapper(*args, **kwargs):
|
||||
process = psutil.Process()
|
||||
|
||||
# Memory before
|
||||
gc.collect()
|
||||
memory_before = process.memory_info().rss / 1024 / 1024
|
||||
|
||||
result = await func(*args, **kwargs)
|
||||
|
||||
# Memory after
|
||||
memory_after = process.memory_info().rss / 1024 / 1024
|
||||
memory_used = memory_after - memory_before
|
||||
|
||||
if hasattr(result, 'update') and isinstance(result, dict):
|
||||
result['memory_used_mb'] = memory_used
|
||||
|
||||
return result
|
||||
return wrapper
|
||||
|
||||
|
||||
# ================================================================
|
||||
# STANDALONE EXECUTION
|
||||
# ================================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Run performance tests as standalone script
|
||||
Usage: python test_performance.py
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
# Add the training service root to Python path
|
||||
training_service_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, training_service_root)
|
||||
|
||||
print("=" * 60)
|
||||
print("TRAINING SERVICE PERFORMANCE TEST SUITE")
|
||||
print("=" * 60)
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
# Run performance tests
|
||||
pytest.main([
|
||||
__file__,
|
||||
"-v",
|
||||
"--tb=short",
|
||||
"-s", # Don't capture output
|
||||
"--durations=10", # Show 10 slowest tests
|
||||
"-m", "not slow", # Skip slow tests unless specifically requested
|
||||
])
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("PERFORMANCE TESTING COMPLETE")
|
||||
print("=" * 60)
|
||||
Reference in New Issue
Block a user