Fix data fetch 5
This commit is contained in:
@@ -68,7 +68,6 @@ class ServiceAuthenticator:
|
|||||||
def get_request_headers(self, tenant_id: str = None) -> Dict[str, str]:
|
def get_request_headers(self, tenant_id: str = None) -> Dict[str, str]:
|
||||||
"""Get standard headers for service requests"""
|
"""Get standard headers for service requests"""
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
|
||||||
"X-Service": "training-service",
|
"X-Service": "training-service",
|
||||||
"User-Agent": "training-service/1.0.0"
|
"User-Agent": "training-service/1.0.0"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ class DataServiceClient:
|
|||||||
|
|
||||||
# Make request via gateway
|
# Make request via gateway
|
||||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||||
response = await client.post(
|
response = await client.get(
|
||||||
f"{self.base_url}/api/v1/tenants/{tenant_id}/sales",
|
f"{self.base_url}/api/v1/tenants/{tenant_id}/sales",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
params=params
|
params=params
|
||||||
@@ -87,6 +87,7 @@ class DataServiceClient:
|
|||||||
tenant_id=tenant_id)
|
tenant_id=tenant_id)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
async def fetch_weather_data(
|
async def fetch_weather_data(
|
||||||
self,
|
self,
|
||||||
tenant_id: str,
|
tenant_id: str,
|
||||||
@@ -107,14 +108,16 @@ class DataServiceClient:
|
|||||||
headers["Authorization"] = f"Bearer {token}"
|
headers["Authorization"] = f"Bearer {token}"
|
||||||
headers["Content-Type"] = "application/json"
|
headers["Content-Type"] = "application/json"
|
||||||
|
|
||||||
# Prepare request payload
|
# Prepare request payload with proper date handling
|
||||||
payload = {
|
payload = {
|
||||||
"start_date": start_date,
|
"start_date": start_date, # Already in ISO format from calling code
|
||||||
"end_date": end_date,
|
"end_date": end_date, # Already in ISO format from calling code
|
||||||
"latitude": latitude or 40.4168, # Default Madrid coordinates
|
"latitude": latitude or 40.4168, # Default Madrid coordinates
|
||||||
"longitude": longitude or -3.7038
|
"longitude": longitude or -3.7038
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.info(f"Weather request payload: {payload}", tenant_id=tenant_id)
|
||||||
|
|
||||||
# Make POST request via gateway
|
# Make POST request via gateway
|
||||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
@@ -128,11 +131,36 @@ class DataServiceClient:
|
|||||||
url=response.url)
|
url=response.url)
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.json()
|
data = response.json()
|
||||||
|
logger.info(f"Successfully fetched {len(data)} weather records")
|
||||||
|
return data
|
||||||
|
elif response.status_code == 400:
|
||||||
|
error_details = response.text
|
||||||
|
logger.error(f"Weather API validation error (400): {error_details}")
|
||||||
|
|
||||||
|
# Try to parse the error and provide helpful info
|
||||||
|
try:
|
||||||
|
error_json = response.json()
|
||||||
|
if 'detail' in error_json:
|
||||||
|
detail = error_json['detail']
|
||||||
|
if 'End date must be after start date' in str(detail):
|
||||||
|
logger.error(f"Date range issue: start={start_date}, end={end_date}")
|
||||||
|
elif 'Date range cannot exceed 90 days' in str(detail):
|
||||||
|
logger.error(f"Date range too large: {start_date} to {end_date}")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return []
|
||||||
|
elif response.status_code == 401:
|
||||||
|
logger.error("Authentication failed for weather API")
|
||||||
|
return []
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to fetch weather data: {response.status_code} - {response.text}")
|
logger.error(f"Failed to fetch weather data: {response.status_code} - {response.text}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.error("Timeout when fetching weather data")
|
||||||
|
return []
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error fetching weather data: {str(e)}")
|
logger.error(f"Error fetching weather data: {str(e)}")
|
||||||
return []
|
return []
|
||||||
@@ -159,15 +187,17 @@ class DataServiceClient:
|
|||||||
|
|
||||||
# Prepare request payload
|
# Prepare request payload
|
||||||
payload = {
|
payload = {
|
||||||
"start_date": start_date,
|
"start_date": start_date, # Already in ISO format from calling code
|
||||||
"end_date": end_date,
|
"end_date": end_date, # Already in ISO format from calling code
|
||||||
"latitude": latitude or 40.4168, # Default Madrid coordinates
|
"latitude": latitude or 40.4168, # Default Madrid coordinates
|
||||||
"longitude": longitude or -3.7038
|
"longitude": longitude or -3.7038
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.info(f"Traffic request payload: {payload}", tenant_id=tenant_id)
|
||||||
|
|
||||||
# Make POST request via gateway
|
# Make POST request via gateway
|
||||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||||
response = await client.get(
|
response = await client.post(
|
||||||
f"{self.base_url}/api/v1/tenants/{tenant_id}/traffic/historical",
|
f"{self.base_url}/api/v1/tenants/{tenant_id}/traffic/historical",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=payload
|
json=payload
|
||||||
@@ -178,11 +208,23 @@ class DataServiceClient:
|
|||||||
url=response.url)
|
url=response.url)
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.json()
|
data = response.json()
|
||||||
|
logger.info(f"Successfully fetched {len(data)} traffic records")
|
||||||
|
return data
|
||||||
|
elif response.status_code == 400:
|
||||||
|
error_details = response.text
|
||||||
|
logger.error(f"Traffic API validation error (400): {error_details}")
|
||||||
|
return []
|
||||||
|
elif response.status_code == 401:
|
||||||
|
logger.error("Authentication failed for traffic API")
|
||||||
|
return []
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to fetch traffic data: {response.status_code} - {response.text}")
|
logger.error(f"Failed to fetch traffic data: {response.status_code} - {response.text}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.error("Timeout when fetching traffic data")
|
||||||
|
return []
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error fetching traffic data: {str(e)}")
|
logger.error(f"Error fetching traffic data: {str(e)}")
|
||||||
return []
|
return []
|
||||||
@@ -34,6 +34,75 @@ class TrainingService:
|
|||||||
self.ml_trainer = BakeryMLTrainer()
|
self.ml_trainer = BakeryMLTrainer()
|
||||||
self.data_client = DataServiceClient()
|
self.data_client = DataServiceClient()
|
||||||
|
|
||||||
|
async def _determine_sales_date_range(self, sales_data: List[Dict]) -> tuple[datetime, datetime]:
|
||||||
|
"""Determine start and end dates from sales data with validation"""
|
||||||
|
if not sales_data:
|
||||||
|
raise ValueError("No sales data available to determine date range")
|
||||||
|
|
||||||
|
dates = []
|
||||||
|
for record in sales_data:
|
||||||
|
if 'date' in record:
|
||||||
|
try:
|
||||||
|
if isinstance(record['date'], str):
|
||||||
|
# Handle various date string formats
|
||||||
|
date_str = record['date'].replace('Z', '+00:00')
|
||||||
|
if 'T' in date_str:
|
||||||
|
parsed_date = datetime.fromisoformat(date_str)
|
||||||
|
else:
|
||||||
|
# Handle date-only strings
|
||||||
|
parsed_date = datetime.strptime(date_str, '%Y-%m-%d')
|
||||||
|
dates.append(parsed_date)
|
||||||
|
elif isinstance(record['date'], datetime):
|
||||||
|
dates.append(record['date'])
|
||||||
|
except (ValueError, AttributeError) as e:
|
||||||
|
logger.warning(f"Invalid date format in record: {record['date']} - {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not dates:
|
||||||
|
raise ValueError("No valid dates found in sales data")
|
||||||
|
|
||||||
|
start_date = min(dates)
|
||||||
|
end_date = max(dates)
|
||||||
|
|
||||||
|
# Validate and adjust date range for external APIs
|
||||||
|
start_date, end_date = self._adjust_date_range_for_apis(start_date, end_date)
|
||||||
|
|
||||||
|
logger.info(f"Determined and adjusted sales date range: {start_date} to {end_date}")
|
||||||
|
return start_date, end_date
|
||||||
|
|
||||||
|
def _adjust_date_range_for_apis(self, start_date: datetime, end_date: datetime) -> tuple[datetime, datetime]:
|
||||||
|
"""Adjust date range to comply with external API limits"""
|
||||||
|
|
||||||
|
# Weather and traffic APIs have a 90-day limit
|
||||||
|
MAX_DAYS = 90
|
||||||
|
|
||||||
|
# Calculate current range
|
||||||
|
current_range = (end_date - start_date).days
|
||||||
|
|
||||||
|
if current_range > MAX_DAYS:
|
||||||
|
logger.warning(f"Date range ({current_range} days) exceeds API limit ({MAX_DAYS} days). Adjusting...")
|
||||||
|
|
||||||
|
# Keep the most recent data
|
||||||
|
start_date = end_date - timedelta(days=MAX_DAYS)
|
||||||
|
logger.info(f"Adjusted start_date to {start_date} to fit within {MAX_DAYS} day limit")
|
||||||
|
|
||||||
|
# Ensure dates are not in the future
|
||||||
|
now = datetime.now()
|
||||||
|
if end_date > now:
|
||||||
|
end_date = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
logger.info(f"Adjusted end_date to {end_date} (cannot be in future)")
|
||||||
|
|
||||||
|
if start_date > now:
|
||||||
|
start_date = now.replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=30)
|
||||||
|
logger.info(f"Adjusted start_date to {start_date} (was in future)")
|
||||||
|
|
||||||
|
# Ensure start_date is before end_date
|
||||||
|
if start_date >= end_date:
|
||||||
|
start_date = end_date - timedelta(days=30) # Default to 30 days of data
|
||||||
|
logger.warning(f"start_date was not before end_date. Adjusted start_date to {start_date}")
|
||||||
|
|
||||||
|
return start_date, end_date
|
||||||
|
|
||||||
async def execute_training_job_simple(self, job_id: str, tenant_id_str: str, request: TrainingJobRequest):
|
async def execute_training_job_simple(self, job_id: str, tenant_id_str: str, request: TrainingJobRequest):
|
||||||
"""Simple wrapper that creates its own database session"""
|
"""Simple wrapper that creates its own database session"""
|
||||||
try:
|
try:
|
||||||
@@ -146,23 +215,43 @@ class TrainingService:
|
|||||||
# Determine date range from sales data
|
# Determine date range from sales data
|
||||||
start_date, end_date = await self._determine_sales_date_range(sales_data)
|
start_date, end_date = await self._determine_sales_date_range(sales_data)
|
||||||
|
|
||||||
|
# Convert dates to ISO format strings for API calls
|
||||||
|
start_date_str = start_date.isoformat()
|
||||||
|
end_date_str = end_date.isoformat()
|
||||||
|
|
||||||
|
logger.info(f"Using date range for external APIs: {start_date_str} to {end_date_str}")
|
||||||
|
|
||||||
# Fetch external data if requested using the sales date range
|
# Fetch external data if requested using the sales date range
|
||||||
weather_data = []
|
weather_data = []
|
||||||
traffic_data = []
|
traffic_data = []
|
||||||
|
|
||||||
await self._update_job_status(db, job_id, "running", 15, "Fetching weather data")
|
await self._update_job_status(db, job_id, "running", 15, "Fetching weather data")
|
||||||
weather_data = await self.data_client.fetch_weather_data(
|
try:
|
||||||
tenant_id,
|
weather_data = await self.data_client.fetch_weather_data(
|
||||||
start_date=start_date.isoformat(),
|
tenant_id=tenant_id,
|
||||||
end_date=end_date.isoformat()
|
start_date=start_date_str,
|
||||||
)
|
end_date=end_date_str,
|
||||||
|
latitude=40.4168, # Madrid coordinates
|
||||||
|
longitude=-3.7038
|
||||||
|
)
|
||||||
|
logger.info(f"Fetched {len(weather_data)} weather records")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to fetch weather data: {e}. Continuing without weather data.")
|
||||||
|
weather_data = []
|
||||||
|
|
||||||
await self._update_job_status(db, job_id, "running", 25, "Fetching traffic data")
|
await self._update_job_status(db, job_id, "running", 25, "Fetching traffic data")
|
||||||
traffic_data = await self.data_client.fetch_traffic_data(
|
try:
|
||||||
tenant_id,
|
traffic_data = await self.data_client.fetch_traffic_data(
|
||||||
start_date=start_date.isoformat(),
|
tenant_id=tenant_id,
|
||||||
end_date=end_date.isoformat()
|
start_date=start_date_str,
|
||||||
)
|
end_date=end_date_str,
|
||||||
|
latitude=40.4168,
|
||||||
|
longitude=-3.7038
|
||||||
|
)
|
||||||
|
logger.info(f"Fetched {len(traffic_data)} traffic records")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to fetch traffic data: {e}. Continuing without traffic data.")
|
||||||
|
traffic_data = []
|
||||||
|
|
||||||
# Execute ML training
|
# Execute ML training
|
||||||
await self._update_job_status(db, job_id, "running", 35, "Processing training data")
|
await self._update_job_status(db, job_id, "running", 35, "Processing training data")
|
||||||
|
|||||||
Reference in New Issue
Block a user