Fix data fetch 7

This commit is contained in:
Urtzi Alfaro
2025-07-27 22:58:18 +02:00
parent 0201b428e5
commit 946015b80c
8 changed files with 138 additions and 100 deletions

View File

@@ -15,7 +15,7 @@ Features:
import math
import xml.etree.ElementTree as ET
from typing import List, Dict, Any, Optional, Tuple
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
import structlog
import re
from dataclasses import dataclass
@@ -480,7 +480,19 @@ class MadridOpenDataClient(BaseAPIClient):
# Parse date
record_date = self._parse_madrid_date(row.get('fecha', '').strip().strip('"'))
if not record_date or not (start_date <= record_date <= end_date):
if not record_date:
return None
# ✅ CRITICAL FIX: Ensure both dates are timezone-aware for comparison
if start_date.tzinfo is None:
start_date = start_date.replace(tzinfo=timezone.utc)
if end_date.tzinfo is None:
end_date = end_date.replace(tzinfo=timezone.utc)
if record_date.tzinfo is None:
record_date = record_date.replace(tzinfo=timezone.utc)
# Now we can safely compare timezone-aware datetimes
if not (start_date <= record_date <= end_date):
return None
# Parse traffic data
@@ -749,15 +761,21 @@ class MadridOpenDataClient(BaseAPIClient):
return int(base * multiplier)
def _parse_madrid_date(self, fecha_str: str) -> Optional[datetime]:
"""Parse Madrid date format"""
"""Parse Madrid date format with timezone awareness"""
if not fecha_str:
return None
try:
return datetime.strptime(fecha_str, '%Y-%m-%d %H:%M:%S')
# Parse the date as timezone-naive first
dt = datetime.strptime(fecha_str, '%Y-%m-%d %H:%M:%S')
# Convert to timezone-aware (assume Madrid/UTC timezone)
return dt.replace(tzinfo=timezone.utc)
except ValueError:
try:
return datetime.strptime(fecha_str, '%d/%m/%Y %H:%M:%S')
# Try alternative format
dt = datetime.strptime(fecha_str, '%d/%m/%Y %H:%M:%S')
# Convert to timezone-aware (assume Madrid/UTC timezone)
return dt.replace(tzinfo=timezone.utc)
except ValueError:
return None

View File

@@ -13,6 +13,8 @@ from app.models.traffic import TrafficData
from app.external.madrid_opendata import MadridOpenDataClient
from app.schemas.external import TrafficDataResponse
import uuid
logger = structlog.get_logger()
class TrafficService:
@@ -102,26 +104,26 @@ class TrafficService:
try:
for data in traffic_data:
traffic_record = TrafficData(
id = id,
location_id = location_id,
date = data.get('date', datetime.now()),
traffic_volume = data.get('traffic_volume'),
pedestrian_count = data.get('pedestrian_count'),
congestion_level = data.get('congestion_level'),
average_speed = data.get('average_speed'),
source = "Madrid Open Data",
raw_data = str(data),
created_at = data.get('created_at'),
location_id=location_id,
date=data.get('date', datetime.now()),
traffic_volume=data.get('traffic_volume'),
pedestrian_count=data.get('pedestrian_count'),
congestion_level=data.get('congestion_level'),
average_speed=data.get('average_speed'),
source="madrid_opendata",
raw_data=str(data),
created_at=datetime.now()
)
db.add(traffic_record)
await db.commit()
logger.debug("Historical data stored in database", count=len(traffic_record))
logger.debug("Historical data stored in database", count=len(traffic_data))
except Exception as db_error:
logger.warning("Failed to store historical data in database", error=str(db_error))
await db.rollback()
return [TrafficDataResponse(**item) for item in traffic_data]
return [TrafficDataResponse(**item) for item in traffic_record]
else:
logger.warning("No historical traffic data received")
return []

View File

@@ -0,0 +1,49 @@
# ================================================================
# services/data/migrations/versions/20250727_add_timezone_to_datetime_columns.py
# ================================================================
"""Add timezone support to datetime columns
Revision ID: 20250727_193000
Revises:
Create Date: 2025-07-27 19:30:00.000000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '20250727_193000'
down_revision = None # Replace with actual previous revision if exists
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Convert TIMESTAMP WITHOUT TIME ZONE to TIMESTAMP WITH TIME ZONE"""
# Weather data table
op.execute("ALTER TABLE weather_data ALTER COLUMN date TYPE TIMESTAMP WITH TIME ZONE USING date AT TIME ZONE 'UTC'")
op.execute("ALTER TABLE weather_data ALTER COLUMN created_at TYPE TIMESTAMP WITH TIME ZONE USING created_at AT TIME ZONE 'UTC'")
# Weather forecasts table
op.execute("ALTER TABLE weather_forecasts ALTER COLUMN forecast_date TYPE TIMESTAMP WITH TIME ZONE USING forecast_date AT TIME ZONE 'UTC'")
op.execute("ALTER TABLE weather_forecasts ALTER COLUMN generated_at TYPE TIMESTAMP WITH TIME ZONE USING generated_at AT TIME ZONE 'UTC'")
# Traffic data table
op.execute("ALTER TABLE traffic_data ALTER COLUMN date TYPE TIMESTAMP WITH TIME ZONE USING date AT TIME ZONE 'UTC'")
op.execute("ALTER TABLE traffic_data ALTER COLUMN created_at TYPE TIMESTAMP WITH TIME ZONE USING created_at AT TIME ZONE 'UTC'")
def downgrade() -> None:
"""Convert TIMESTAMP WITH TIME ZONE back to TIMESTAMP WITHOUT TIME ZONE"""
# Weather data table
op.execute("ALTER TABLE weather_data ALTER COLUMN date TYPE TIMESTAMP WITHOUT TIME ZONE USING date AT TIME ZONE 'UTC'")
op.execute("ALTER TABLE weather_data ALTER COLUMN created_at TYPE TIMESTAMP WITHOUT TIME ZONE USING created_at AT TIME ZONE 'UTC'")
# Weather forecasts table
op.execute("ALTER TABLE weather_forecasts ALTER COLUMN forecast_date TYPE TIMESTAMP WITHOUT TIME ZONE USING forecast_date AT TIME ZONE 'UTC'")
op.execute("ALTER TABLE weather_forecasts ALTER COLUMN generated_at TYPE TIMESTAMP WITHOUT TIME ZONE USING generated_at AT TIME ZONE 'UTC'")
# Traffic data table
op.execute("ALTER TABLE traffic_data ALTER COLUMN date TYPE TIMESTAMP WITHOUT TIME ZONE USING date AT TIME ZONE 'UTC'")
op.execute("ALTER TABLE traffic_data ALTER COLUMN created_at TYPE TIMESTAMP WITHOUT TIME ZONE USING created_at AT TIME ZONE 'UTC'")

View File

@@ -153,7 +153,8 @@ async def get_training_job(
job_id: str,
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
training_service: TrainingService = Depends(get_training_service)
training_service: TrainingService = Depends(get_training_service),
db: AsyncSession = Depends(get_db_session)
):
"""Get specific training job details"""
try:
@@ -164,17 +165,24 @@ async def get_training_job(
job_id=job_id,
tenant_id=tenant_id_str)
job = await training_service.get_training_job(job_id)
job_log = await training_service.get_job_status(db, job_id, tenant_id_str)
# Verify tenant access
if job.tenant_id != tenant_id:
if job_log.tenant_id != tenant_id:
logger.warning("Unauthorized job access attempt",
job_id=job_id,
tenant_id=str(tenant_id),
job_tenant_id=job.tenant_id)
raise HTTPException(status_code=404, detail="Job not found")
return job
return TrainingJobResponse(
job_id=job_log.job_id,
status=TrainingStatus(job_log.status),
message=_generate_status_message(job_log.status, job_log.current_step),
tenant_id=str(job_log.tenant_id),
created_at=job_log.start_time,
estimated_duration_minutes=_estimate_duration(job_log.status, job_log.progress)
)
except HTTPException:
raise
@@ -501,4 +509,31 @@ async def retrain_all_products(
logger.error("Failed to start retraining",
error=str(e),
tenant_id=tenant_id)
raise HTTPException(status_code=500, detail=f"Failed to start retraining: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to start retraining: {str(e)}")
def _generate_status_message(status: str, current_step: str) -> str:
"""Generate appropriate status message"""
status_messages = {
"pending": "Training job is queued",
"running": f"Training in progress: {current_step}",
"completed": "Training completed successfully",
"failed": "Training failed",
"cancelled": "Training was cancelled"
}
return status_messages.get(status, f"Status: {status}")
def _estimate_duration(status: str, progress: int) -> int:
"""Estimate remaining duration in minutes"""
if status == "completed":
return 0
elif status == "failed" or status == "cancelled":
return 0
elif status == "pending":
return 30 # Default estimate
else: # running
if progress > 0:
# Rough estimate based on progress
remaining_progress = 100 - progress
return max(1, int((remaining_progress / max(progress, 1)) * 10))
else:
return 25 # Default for running jobs

View File

@@ -11,7 +11,7 @@ class DataServiceClient:
def __init__(self):
self.base_url = settings.API_GATEWAY_URL
self.timeout = 30.0
self.timeout = 2000.0
async def fetch_sales_data(
self,
@@ -196,7 +196,8 @@ class DataServiceClient:
logger.info(f"Traffic request payload: {payload}", tenant_id=tenant_id)
# Make POST request via gateway
async with httpx.AsyncClient(timeout=self.timeout) as client:
timeout_config = httpx.Timeout(connect=30.0, read=self.timeout, write=30.0, pool=30.0)
async with httpx.AsyncClient(timeout=timeout_config) as client:
response = await client.post(
f"{self.base_url}/api/v1/tenants/{tenant_id}/traffic/historical",
headers=headers,

View File

@@ -535,79 +535,6 @@ class TrainingService:
logger.error(f"Failed to update job status: {str(e)}")
await db.rollback()
async def _fetch_sales_data(self,
tenant_id: str,
request: Any,
limit: Optional[int] = None) -> List[Dict]:
"""Fetch sales data from data service"""
try:
# Call data service to get sales data
async with httpx.AsyncClient() as client:
params = {}
headers = {
"X-Tenant-ID": tenant_id
}
if hasattr(request, 'start_date') and request.start_date:
params["start_date"] = request.start_date.isoformat()
if hasattr(request, 'end_date') and request.end_date:
params["end_date"] = request.end_date.isoformat()
if limit:
params["limit"] = limit
response = await client.get(
f"{settings.DATA_SERVICE_URL}/api/v1/tenants/{tenant_id}/sales",
params=params,
headers=headers,
timeout=30.0
)
if response.status_code == 200:
return response.json().get("sales", [])
else:
logger.error(f"Failed to fetch sales data: {response.status_code}")
return []
except Exception as e:
logger.error(f"Error fetching sales data: {str(e)}")
return []
async def _fetch_product_sales_data(self,
tenant_id: str,
product_name: str,
request: Any) -> List[Dict]:
"""Fetch sales data for a specific product"""
try:
async with httpx.AsyncClient() as client:
params = {
"tenant_id": tenant_id,
"product_name": product_name
}
if hasattr(request, 'start_date') and request.start_date:
params["start_date"] = request.start_date.isoformat()
if hasattr(request, 'end_date') and request.end_date:
params["end_date"] = request.end_date.isoformat()
response = await client.get(
f"{settings.DATA_SERVICE_URL}/api/sales/product/{product_name}",
params=params,
timeout=30.0
)
if response.status_code == 200:
return response.json().get("sales", [])
else:
logger.error(f"Failed to fetch product sales data: {response.status_code}")
return []
except Exception as e:
logger.error(f"Error fetching product sales data: {str(e)}")
return []
async def _store_trained_models(self,
db: AsyncSession,
tenant_id: str,