405 lines
18 KiB
Python
405 lines
18 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Updated Madrid Historical Traffic test for pytest inside Docker
|
|||
|
|
Configured for June 2025 data availability (last available historical data)
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import pytest
|
|||
|
|
import asyncio
|
|||
|
|
from datetime import datetime, timedelta
|
|||
|
|
from typing import List, Dict, Any
|
|||
|
|
|
|||
|
|
# Import from the actual service
|
|||
|
|
from app.external.madrid_opendata import MadridOpenDataClient
|
|||
|
|
from app.core.config import settings
|
|||
|
|
import structlog
|
|||
|
|
|
|||
|
|
# Configure pytest for async
|
|||
|
|
pytestmark = pytest.mark.asyncio
|
|||
|
|
|
|||
|
|
# Use actual logger
|
|||
|
|
logger = structlog.get_logger()
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TestMadridTrafficInside:
|
|||
|
|
"""Test class for Madrid traffic functionality inside Docker"""
|
|||
|
|
|
|||
|
|
@pytest.fixture
|
|||
|
|
def client(self):
|
|||
|
|
"""Create Madrid client for testing"""
|
|||
|
|
return MadridOpenDataClient()
|
|||
|
|
|
|||
|
|
@pytest.fixture
|
|||
|
|
def madrid_coords(self):
|
|||
|
|
"""Madrid center coordinates"""
|
|||
|
|
return 40.4168, -3.7038
|
|||
|
|
|
|||
|
|
@pytest.fixture
|
|||
|
|
def june_2025_dates(self):
|
|||
|
|
"""Date ranges for June 2025 (last available historical data)"""
|
|||
|
|
return {
|
|||
|
|
"quick": {
|
|||
|
|
"start": datetime(2025, 6, 1, 0, 0),
|
|||
|
|
"end": datetime(2025, 6, 1, 6, 0) # 6 hours on June 1st
|
|||
|
|
},
|
|||
|
|
"one_day": {
|
|||
|
|
"start": datetime(2025, 6, 15, 0, 0), # Mid-June
|
|||
|
|
"end": datetime(2025, 6, 16, 0, 0) # One full day
|
|||
|
|
},
|
|||
|
|
"three_days": {
|
|||
|
|
"start": datetime(2025, 6, 10, 0, 0),
|
|||
|
|
"end": datetime(2025, 6, 13, 0, 0) # 3 days in June
|
|||
|
|
},
|
|||
|
|
"recent_synthetic": {
|
|||
|
|
"start": datetime.now() - timedelta(hours=6),
|
|||
|
|
"end": datetime.now() # Recent data (will be synthetic)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
async def test_quick_historical_traffic_june2025(self, client, madrid_coords, june_2025_dates):
|
|||
|
|
"""Test quick historical traffic data from June 2025"""
|
|||
|
|
lat, lon = madrid_coords
|
|||
|
|
date_range = june_2025_dates["quick"]
|
|||
|
|
start_time = date_range["start"]
|
|||
|
|
end_time = date_range["end"]
|
|||
|
|
|
|||
|
|
print(f"\n=== Quick Test (June 2025 - 6 hours) ===")
|
|||
|
|
print(f"Location: {lat}, {lon}")
|
|||
|
|
print(f"Date range: {start_time.strftime('%Y-%m-%d %H:%M')} to {end_time.strftime('%Y-%m-%d %H:%M')}")
|
|||
|
|
print(f"Note: Testing with June 2025 data (last available historical month)")
|
|||
|
|
|
|||
|
|
# Test the function
|
|||
|
|
execution_start = datetime.now()
|
|||
|
|
result = await client.get_historical_traffic(lat, lon, start_time, end_time)
|
|||
|
|
execution_time = (datetime.now() - execution_start).total_seconds()
|
|||
|
|
|
|||
|
|
print(f"⏱️ Execution time: {execution_time:.2f} seconds")
|
|||
|
|
print(f"📊 Records returned: {len(result)}")
|
|||
|
|
|
|||
|
|
# Assertions
|
|||
|
|
assert isinstance(result, list), "Result should be a list"
|
|||
|
|
assert len(result) > 0, "Should return at least some records"
|
|||
|
|
assert execution_time < 30, "Should execute in reasonable time (allowing for ZIP download)"
|
|||
|
|
|
|||
|
|
# Check first record structure
|
|||
|
|
if result:
|
|||
|
|
sample = result[0]
|
|||
|
|
print(f"📋 Sample record keys: {list(sample.keys())}")
|
|||
|
|
print(f"📡 Data source: {sample.get('source', 'unknown')}")
|
|||
|
|
|
|||
|
|
# Required fields
|
|||
|
|
required_fields = ['date', 'traffic_volume', 'congestion_level', 'average_speed', 'source']
|
|||
|
|
for field in required_fields:
|
|||
|
|
assert field in sample, f"Missing required field: {field}"
|
|||
|
|
|
|||
|
|
# Data validation
|
|||
|
|
assert isinstance(sample['traffic_volume'], int), "Traffic volume should be int"
|
|||
|
|
assert 0 <= sample['traffic_volume'] <= 1000, "Traffic volume should be reasonable"
|
|||
|
|
assert sample['congestion_level'] in ['low', 'medium', 'high', 'blocked'], "Invalid congestion level"
|
|||
|
|
assert 5 <= sample['average_speed'] <= 100, "Speed should be reasonable"
|
|||
|
|
assert isinstance(sample['date'], datetime), "Date should be datetime object"
|
|||
|
|
|
|||
|
|
# Check if we got real Madrid data or synthetic
|
|||
|
|
if sample['source'] == 'madrid_opendata_zip':
|
|||
|
|
print(f"🎉 SUCCESS: Got real Madrid historical data from ZIP!")
|
|||
|
|
else:
|
|||
|
|
print(f"ℹ️ Got synthetic data (real data may not be available)")
|
|||
|
|
|
|||
|
|
print(f"✅ All validations passed")
|
|||
|
|
|
|||
|
|
async def test_one_day_june2025(self, client, madrid_coords, june_2025_dates):
|
|||
|
|
"""Test one day of June 2025 historical traffic data"""
|
|||
|
|
lat, lon = madrid_coords
|
|||
|
|
date_range = june_2025_dates["one_day"]
|
|||
|
|
start_time = date_range["start"]
|
|||
|
|
end_time = date_range["end"]
|
|||
|
|
|
|||
|
|
print(f"\n=== One Day Test (June 15, 2025) ===")
|
|||
|
|
print(f"Date range: {start_time.strftime('%Y-%m-%d %H:%M')} to {end_time.strftime('%Y-%m-%d %H:%M')}")
|
|||
|
|
|
|||
|
|
result = await client.get_historical_traffic(lat, lon, start_time, end_time)
|
|||
|
|
|
|||
|
|
print(f"📊 Records returned: {len(result)}")
|
|||
|
|
|
|||
|
|
# Should have roughly 24 records (one per hour)
|
|||
|
|
assert len(result) >= 20, "Should have at least 20 hourly records for one day"
|
|||
|
|
assert len(result) <= 30, "Should not have more than 30 records for one day"
|
|||
|
|
|
|||
|
|
# Check data source
|
|||
|
|
if result:
|
|||
|
|
sources = set(r['source'] for r in result)
|
|||
|
|
print(f"📡 Data sources: {', '.join(sources)}")
|
|||
|
|
|
|||
|
|
# If we got real data, check for realistic measurement point IDs
|
|||
|
|
real_data_records = [r for r in result if r['source'] == 'madrid_opendata_zip']
|
|||
|
|
if real_data_records:
|
|||
|
|
point_ids = set(r['measurement_point_id'] for r in real_data_records)
|
|||
|
|
print(f"🏷️ Real measurement points found: {len(point_ids)}")
|
|||
|
|
print(f" Sample IDs: {list(point_ids)[:3]}")
|
|||
|
|
|
|||
|
|
# Check traffic patterns
|
|||
|
|
if len(result) >= 24:
|
|||
|
|
# Find rush hour records (7-9 AM, 6-8 PM)
|
|||
|
|
rush_hour_records = [r for r in result if 7 <= r['date'].hour <= 9 or 18 <= r['date'].hour <= 20]
|
|||
|
|
night_records = [r for r in result if r['date'].hour <= 6 or r['date'].hour >= 22]
|
|||
|
|
|
|||
|
|
if rush_hour_records and night_records:
|
|||
|
|
avg_rush_traffic = sum(r['traffic_volume'] for r in rush_hour_records) / len(rush_hour_records)
|
|||
|
|
avg_night_traffic = sum(r['traffic_volume'] for r in night_records) / len(night_records)
|
|||
|
|
|
|||
|
|
print(f"📈 Rush hour avg traffic: {avg_rush_traffic:.1f}")
|
|||
|
|
print(f"🌙 Night avg traffic: {avg_night_traffic:.1f}")
|
|||
|
|
|
|||
|
|
# Rush hour should typically have more traffic than night
|
|||
|
|
if avg_rush_traffic > avg_night_traffic:
|
|||
|
|
print(f"✅ Traffic patterns look realistic")
|
|||
|
|
else:
|
|||
|
|
print(f"⚠️ Traffic patterns unusual (not necessarily wrong)")
|
|||
|
|
|
|||
|
|
async def test_three_days_june2025(self, client, madrid_coords, june_2025_dates):
|
|||
|
|
"""Test three days of June 2025 historical traffic data"""
|
|||
|
|
lat, lon = madrid_coords
|
|||
|
|
date_range = june_2025_dates["three_days"]
|
|||
|
|
start_time = date_range["start"]
|
|||
|
|
end_time = date_range["end"]
|
|||
|
|
|
|||
|
|
print(f"\n=== Three Days Test (June 10-13, 2025) ===")
|
|||
|
|
print(f"Date range: {start_time.strftime('%Y-%m-%d')} to {end_time.strftime('%Y-%m-%d')}")
|
|||
|
|
|
|||
|
|
result = await client.get_historical_traffic(lat, lon, start_time, end_time)
|
|||
|
|
|
|||
|
|
print(f"📊 Records returned: {len(result)}")
|
|||
|
|
|
|||
|
|
# Should have roughly 72 records (24 hours * 3 days)
|
|||
|
|
assert len(result) >= 60, "Should have at least 60 records for 3 days"
|
|||
|
|
assert len(result) <= 90, "Should not have more than 90 records for 3 days"
|
|||
|
|
|
|||
|
|
# Check data sources
|
|||
|
|
sources = set(r['source'] for r in result)
|
|||
|
|
print(f"📡 Data sources: {', '.join(sources)}")
|
|||
|
|
|
|||
|
|
# Calculate statistics
|
|||
|
|
traffic_volumes = [r['traffic_volume'] for r in result]
|
|||
|
|
speeds = [r['average_speed'] for r in result]
|
|||
|
|
|
|||
|
|
avg_traffic = sum(traffic_volumes) / len(traffic_volumes)
|
|||
|
|
max_traffic = max(traffic_volumes)
|
|||
|
|
min_traffic = min(traffic_volumes)
|
|||
|
|
avg_speed = sum(speeds) / len(speeds)
|
|||
|
|
|
|||
|
|
print(f"📈 Statistics:")
|
|||
|
|
print(f" Average traffic: {avg_traffic:.1f}")
|
|||
|
|
print(f" Max traffic: {max_traffic}")
|
|||
|
|
print(f" Min traffic: {min_traffic}")
|
|||
|
|
print(f" Average speed: {avg_speed:.1f} km/h")
|
|||
|
|
|
|||
|
|
# Analyze by data source
|
|||
|
|
real_data_records = [r for r in result if r['source'] == 'madrid_opendata_zip']
|
|||
|
|
synthetic_records = [r for r in result if r['source'] != 'madrid_opendata_zip']
|
|||
|
|
|
|||
|
|
print(f"🔍 Data breakdown:")
|
|||
|
|
print(f" Real Madrid data: {len(real_data_records)} records")
|
|||
|
|
print(f" Synthetic data: {len(synthetic_records)} records")
|
|||
|
|
|
|||
|
|
if real_data_records:
|
|||
|
|
# Show measurement points from real data
|
|||
|
|
real_points = set(r['measurement_point_id'] for r in real_data_records)
|
|||
|
|
print(f" Real measurement points: {len(real_points)}")
|
|||
|
|
|
|||
|
|
# Sanity checks
|
|||
|
|
assert 10 <= avg_traffic <= 500, "Average traffic should be reasonable"
|
|||
|
|
assert 10 <= avg_speed <= 60, "Average speed should be reasonable"
|
|||
|
|
assert max_traffic >= avg_traffic, "Max should be >= average"
|
|||
|
|
assert min_traffic <= avg_traffic, "Min should be <= average"
|
|||
|
|
|
|||
|
|
async def test_recent_vs_historical_data(self, client, madrid_coords, june_2025_dates):
|
|||
|
|
"""Compare recent data (synthetic) vs June 2025 data (potentially real)"""
|
|||
|
|
lat, lon = madrid_coords
|
|||
|
|
|
|||
|
|
print(f"\n=== Recent vs Historical Data Comparison ===")
|
|||
|
|
|
|||
|
|
# Test recent data (should be synthetic)
|
|||
|
|
recent_range = june_2025_dates["recent_synthetic"]
|
|||
|
|
recent_result = await client.get_historical_traffic(
|
|||
|
|
lat, lon, recent_range["start"], recent_range["end"]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Test June 2025 data (potentially real)
|
|||
|
|
june_range = june_2025_dates["quick"]
|
|||
|
|
june_result = await client.get_historical_traffic(
|
|||
|
|
lat, lon, june_range["start"], june_range["end"]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
print(f"📊 Recent data: {len(recent_result)} records")
|
|||
|
|
print(f"📊 June 2025 data: {len(june_result)} records")
|
|||
|
|
|
|||
|
|
if recent_result:
|
|||
|
|
recent_sources = set(r['source'] for r in recent_result)
|
|||
|
|
print(f"📡 Recent sources: {', '.join(recent_sources)}")
|
|||
|
|
|
|||
|
|
if june_result:
|
|||
|
|
june_sources = set(r['source'] for r in june_result)
|
|||
|
|
print(f"📡 June sources: {', '.join(june_sources)}")
|
|||
|
|
|
|||
|
|
# Check if we successfully got real data from June
|
|||
|
|
if 'madrid_opendata_zip' in june_sources:
|
|||
|
|
print(f"🎉 SUCCESS: Real Madrid data successfully fetched from June 2025!")
|
|||
|
|
|
|||
|
|
# Show details of real data
|
|||
|
|
real_records = [r for r in june_result if r['source'] == 'madrid_opendata_zip']
|
|||
|
|
if real_records:
|
|||
|
|
sample = real_records[0]
|
|||
|
|
print(f"📋 Real data sample:")
|
|||
|
|
print(f" Date: {sample['date']}")
|
|||
|
|
print(f" Traffic volume: {sample['traffic_volume']}")
|
|||
|
|
print(f" Measurement point: {sample['measurement_point_id']}")
|
|||
|
|
print(f" Point name: {sample.get('measurement_point_name', 'N/A')}")
|
|||
|
|
else:
|
|||
|
|
print(f"ℹ️ June data is synthetic (real ZIP may not be accessible)")
|
|||
|
|
|
|||
|
|
async def test_madrid_zip_month_code(self, client):
|
|||
|
|
"""Test the month code calculation for Madrid ZIP files"""
|
|||
|
|
print(f"\n=== Madrid ZIP Month Code Test ===")
|
|||
|
|
|
|||
|
|
# Test the month code calculation function
|
|||
|
|
test_cases = [
|
|||
|
|
(2025, 6, 145), # Known: June 2025 = 145
|
|||
|
|
(2025, 5, 144), # Known: May 2025 = 144
|
|||
|
|
(2025, 4, 143), # Known: April 2025 = 143
|
|||
|
|
(2025, 7, 146), # Predicted: July 2025 = 146
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
for year, month, expected_code in test_cases:
|
|||
|
|
if hasattr(client, '_calculate_madrid_month_code'):
|
|||
|
|
calculated_code = client._calculate_madrid_month_code(year, month)
|
|||
|
|
status = "✅" if calculated_code == expected_code else "⚠️"
|
|||
|
|
print(f"{status} {year}-{month:02d}: Expected {expected_code}, Got {calculated_code}")
|
|||
|
|
|
|||
|
|
# Generate ZIP URL
|
|||
|
|
if calculated_code:
|
|||
|
|
zip_url = f"https://datos.madrid.es/egob/catalogo/208627-{calculated_code}-transporte-ptomedida-historico.zip"
|
|||
|
|
print(f" ZIP URL: {zip_url}")
|
|||
|
|
else:
|
|||
|
|
print(f"⚠️ Month code calculation function not available")
|
|||
|
|
|
|||
|
|
async def test_edge_case_large_date_range(self, client, madrid_coords):
|
|||
|
|
"""Test edge case: date range too large"""
|
|||
|
|
lat, lon = madrid_coords
|
|||
|
|
start_time = datetime(2025, 1, 1) # 6+ months range
|
|||
|
|
end_time = datetime(2025, 7, 1)
|
|||
|
|
|
|||
|
|
print(f"\n=== Edge Case: Large Date Range ===")
|
|||
|
|
print(f"Testing 6-month range: {start_time.date()} to {end_time.date()}")
|
|||
|
|
|
|||
|
|
result = await client.get_historical_traffic(lat, lon, start_time, end_time)
|
|||
|
|
|
|||
|
|
print(f"📊 Records for 6-month range: {len(result)}")
|
|||
|
|
|
|||
|
|
# Should return empty list for ranges > 90 days
|
|||
|
|
assert len(result) == 0, "Should return empty list for date ranges > 90 days"
|
|||
|
|
print(f"✅ Correctly handled large date range")
|
|||
|
|
|
|||
|
|
async def test_edge_case_invalid_coordinates(self, client):
|
|||
|
|
"""Test edge case: invalid coordinates"""
|
|||
|
|
print(f"\n=== Edge Case: Invalid Coordinates ===")
|
|||
|
|
|
|||
|
|
start_time = datetime(2025, 6, 1)
|
|||
|
|
end_time = datetime(2025, 6, 1, 6, 0)
|
|||
|
|
|
|||
|
|
# Test with invalid coordinates
|
|||
|
|
result = await client.get_historical_traffic(999.0, 999.0, start_time, end_time)
|
|||
|
|
|
|||
|
|
print(f"📊 Records for invalid coords: {len(result)}")
|
|||
|
|
|
|||
|
|
# Should either return empty list or synthetic data
|
|||
|
|
# The function should not crash
|
|||
|
|
assert isinstance(result, list), "Should return list even with invalid coords"
|
|||
|
|
print(f"✅ Handled invalid coordinates gracefully")
|
|||
|
|
|
|||
|
|
async def test_real_madrid_zip_access(self, client):
|
|||
|
|
"""Test if we can access the actual Madrid ZIP files"""
|
|||
|
|
print(f"\n=== Real Madrid ZIP Access Test ===")
|
|||
|
|
|
|||
|
|
# Test the known ZIP URLs you provided
|
|||
|
|
test_urls = [
|
|||
|
|
"https://datos.madrid.es/egob/catalogo/208627-145-transporte-ptomedida-historico.zip", # June 2025
|
|||
|
|
"https://datos.madrid.es/egob/catalogo/208627-144-transporte-ptomedida-historico.zip", # May 2025
|
|||
|
|
"https://datos.madrid.es/egob/catalogo/208627-143-transporte-ptomedida-historico.zip", # April 2025
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
for i, url in enumerate(test_urls):
|
|||
|
|
month_name = ["June 2025", "May 2025", "April 2025"][i]
|
|||
|
|
print(f"\nTesting {month_name}: {url}")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
if hasattr(client, '_fetch_historical_zip'):
|
|||
|
|
zip_data = await client._fetch_historical_zip(url)
|
|||
|
|
if zip_data:
|
|||
|
|
print(f"✅ Successfully fetched ZIP: {len(zip_data)} bytes")
|
|||
|
|
|
|||
|
|
# Try to inspect ZIP contents
|
|||
|
|
try:
|
|||
|
|
import zipfile
|
|||
|
|
from io import BytesIO
|
|||
|
|
|
|||
|
|
with zipfile.ZipFile(BytesIO(zip_data), 'r') as zip_file:
|
|||
|
|
files = zip_file.namelist()
|
|||
|
|
csv_files = [f for f in files if f.endswith('.csv')]
|
|||
|
|
print(f"📁 ZIP contains {len(files)} files, {len(csv_files)} CSV files")
|
|||
|
|
|
|||
|
|
if csv_files:
|
|||
|
|
print(f" CSV files: {csv_files[:2]}{'...' if len(csv_files) > 2 else ''}")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"⚠️ Could not inspect ZIP contents: {e}")
|
|||
|
|
else:
|
|||
|
|
print(f"❌ Failed to fetch ZIP")
|
|||
|
|
else:
|
|||
|
|
print(f"⚠️ ZIP fetch function not available")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ Error testing ZIP access: {e}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
# Additional standalone test functions for manual running
|
|||
|
|
async def run_manual_test():
|
|||
|
|
"""Manual test function that can be run directly"""
|
|||
|
|
print("="*60)
|
|||
|
|
print("MADRID TRAFFIC TEST - JUNE 2025 DATA")
|
|||
|
|
print("="*60)
|
|||
|
|
|
|||
|
|
client = MadridOpenDataClient()
|
|||
|
|
madrid_lat, madrid_lon = 40.4168, -3.7038
|
|||
|
|
|
|||
|
|
# Test with June 2025 data (last available)
|
|||
|
|
start_time = datetime(2025, 6, 15, 14, 0) # June 15, 2025 at 2 PM
|
|||
|
|
end_time = datetime(2025, 6, 15, 18, 0) # Until 6 PM (4 hours)
|
|||
|
|
|
|||
|
|
print(f"\nTesting June 15, 2025 data (2 PM - 6 PM)...")
|
|||
|
|
print(f"This should include afternoon traffic patterns")
|
|||
|
|
|
|||
|
|
result = await client.get_historical_traffic(madrid_lat, madrid_lon, start_time, end_time)
|
|||
|
|
|
|||
|
|
print(f"Result: {len(result)} records")
|
|||
|
|
|
|||
|
|
if result:
|
|||
|
|
sources = set(r['source'] for r in result)
|
|||
|
|
print(f"Data sources: {', '.join(sources)}")
|
|||
|
|
|
|||
|
|
if 'madrid_opendata_zip' in sources:
|
|||
|
|
print(f"🎉 Successfully got real Madrid data!")
|
|||
|
|
|
|||
|
|
sample = result[0]
|
|||
|
|
print(f"\nSample record:")
|
|||
|
|
for key, value in sample.items():
|
|||
|
|
if key == "date":
|
|||
|
|
print(f" {key}: {value.strftime('%Y-%m-%d %H:%M:%S')}")
|
|||
|
|
else:
|
|||
|
|
print(f" {key}: {value}")
|
|||
|
|
|
|||
|
|
print(f"\n✅ Manual test completed!")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
# If run directly, execute manual test
|
|||
|
|
asyncio.run(run_manual_test())
|