REFACTOR external service and improve websocket training

This commit is contained in:
Urtzi Alfaro
2025-10-09 14:11:02 +02:00
parent 7c72f83c51
commit 3c689b4f98
111 changed files with 13289 additions and 2374 deletions

View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""
Generate realistic one-year bakery sales data for AI model training
Creates daily sales data with proper patterns, seasonality, and realistic variations
Pure Python - no external dependencies
"""
import csv
import random
from datetime import datetime, timedelta
from math import sqrt
# Set random seed for reproducibility
random.seed(42)
# Products with base quantities and prices
PRODUCTS = {
'pan': {'base_qty': 200, 'price': 1.20, 'weekend_factor': 0.85, 'holiday_factor': 1.30},
'croissant': {'base_qty': 110, 'price': 1.50, 'weekend_factor': 1.20, 'holiday_factor': 1.25},
'napolitana': {'base_qty': 75, 'price': 1.80, 'weekend_factor': 1.15, 'holiday_factor': 1.20},
'palmera': {'base_qty': 50, 'price': 1.60, 'weekend_factor': 1.25, 'holiday_factor': 1.15},
'cafe': {'base_qty': 280, 'price': 1.40, 'weekend_factor': 0.75, 'holiday_factor': 0.90}
}
# Spanish holidays in 2025
HOLIDAYS = [
'2025-01-01', # Año Nuevo
'2025-01-06', # Reyes
'2025-04-18', # Viernes Santo
'2025-05-01', # Día del Trabajo
'2025-08-15', # Asunción
'2025-10-12', # Fiesta Nacional
'2025-11-01', # Todos los Santos
'2025-12-06', # Constitución
'2025-12-08', # Inmaculada
'2025-12-25', # Navidad
]
def random_normal(mean=0, std=1):
"""Generate random number from normal distribution using Box-Muller transform"""
u1 = random.random()
u2 = random.random()
z0 = sqrt(-2.0 * 0.693147 * u1) * (2.0 * 3.14159 * u2)**0.5 # Simplified
return mean + z0 * std
def get_temperature(date):
"""Get realistic temperature for Madrid based on month"""
month = date.month
base_temps = {
1: 8, 2: 10, 3: 13, 4: 16, 5: 20, 6: 26,
7: 30, 8: 30, 9: 25, 10: 18, 11: 12, 12: 9
}
base = base_temps[month]
variation = random.uniform(-4, 4)
return round(max(0, base + variation), 1)
def get_precipitation(date, temperature):
"""Get precipitation (mm) - more likely in cooler months"""
month = date.month
# Higher chance of rain in winter/spring
rain_probability = {
1: 0.25, 2: 0.25, 3: 0.20, 4: 0.25, 5: 0.20, 6: 0.10,
7: 0.05, 8: 0.05, 9: 0.15, 10: 0.20, 11: 0.25, 12: 0.25
}
if random.random() < rain_probability[month]:
# Rain amount in mm
return round(random.uniform(2, 25), 1)
return 0
def calculate_quantity(product_name, product_info, date, is_weekend, is_holiday, temperature, precipitation):
"""Calculate realistic quantity sold with various factors"""
base = product_info['base_qty']
# Weekend adjustment
if is_weekend:
base *= product_info['weekend_factor']
# Holiday adjustment
if is_holiday:
base *= product_info['holiday_factor']
# Seasonal adjustment
month = date.month
if month in [12, 1]: # Christmas/New Year boost
base *= 1.15
elif month in [7, 8]: # Summer vacation dip
base *= 0.90
elif month in [4, 5, 9, 10]: # Spring/Fall moderate
base *= 1.05
# Temperature effect
if product_name == 'cafe':
# More coffee when cold
if temperature < 12:
base *= 1.15
elif temperature > 28:
base *= 0.85
else:
# Pastries sell better in moderate weather
if 15 <= temperature <= 25:
base *= 1.05
elif temperature > 30:
base *= 0.90
# Precipitation effect (rainy days reduce sales slightly)
if precipitation > 5:
base *= 0.85
elif precipitation > 15:
base *= 0.75
# Day of week pattern (Mon-Sun)
day_of_week = date.weekday()
day_factors = [0.95, 1.00, 1.05, 1.00, 1.10, 1.15, 1.05] # Mon to Sun
base *= day_factors[day_of_week]
# Add random variation (±15%)
variation = random.uniform(0.85, 1.15)
quantity = int(base * variation)
# Ensure minimum sales
min_qty = {
'pan': 80, 'croissant': 40, 'napolitana': 30,
'palmera': 20, 'cafe': 100
}
quantity = max(min_qty[product_name], quantity)
# Add occasional low-sales days (5% chance)
if random.random() < 0.05:
quantity = int(quantity * random.uniform(0.3, 0.6))
return quantity
def generate_dataset():
"""Generate complete one-year bakery sales dataset"""
start_date = datetime(2024, 9, 1)
end_date = datetime(2025, 9, 1)
records = []
current_date = start_date
print("Generating one year of bakery sales data...")
print(f"Date range: {start_date.date()} to {end_date.date()}")
print(f"Products: {list(PRODUCTS.keys())}")
# Statistics tracking
product_stats = {p: {'total': 0, 'min': float('inf'), 'max': 0, 'count': 0, 'zeros': 0}
for p in PRODUCTS.keys()}
while current_date <= end_date:
# Date properties
is_weekend = current_date.weekday() >= 5 # Saturday=5, Sunday=6
is_holiday = current_date.strftime('%Y-%m-%d') in HOLIDAYS
# Environmental factors
temperature = get_temperature(current_date)
precipitation = get_precipitation(current_date, temperature)
# Generate sales for each product
for product_name, product_info in PRODUCTS.items():
quantity = calculate_quantity(
product_name, product_info, current_date,
is_weekend, is_holiday, temperature, precipitation
)
revenue = round(quantity * product_info['price'], 2)
records.append({
'date': current_date.strftime('%Y-%m-%d'),
'product_name': product_name,
'quantity_sold': quantity,
'revenue': revenue
})
# Update statistics
stats = product_stats[product_name]
stats['total'] += quantity
stats['min'] = min(stats['min'], quantity)
stats['max'] = max(stats['max'], quantity)
stats['count'] += 1
if quantity == 0:
stats['zeros'] += 1
current_date += timedelta(days=1)
# Calculate days
total_days = (end_date - start_date).days + 1
# Print statistics
print(f"\nDataset generated successfully!")
print(f"Total records: {len(records)}")
print(f"Days: {total_days}")
print(f"Products: {len(PRODUCTS)}")
print("\nSales statistics by product:")
for product in PRODUCTS.keys():
stats = product_stats[product]
avg = stats['total'] / stats['count'] if stats['count'] > 0 else 0
zero_pct = (stats['zeros'] / stats['count'] * 100) if stats['count'] > 0 else 0
print(f" {product}:")
print(f" Total sold: {stats['total']:,}")
print(f" Avg daily: {avg:.1f}")
print(f" Min daily: {stats['min']}")
print(f" Max daily: {stats['max']}")
print(f" Zero days: {stats['zeros']} ({zero_pct:.1f}%)")
return records
if __name__ == '__main__':
# Generate dataset
records = generate_dataset()
# Save to CSV
output_file = '/Users/urtzialfaro/Downloads/bakery_data_2025_complete.csv'
with open(output_file, 'w', newline='') as csvfile:
fieldnames = ['date', 'product_name', 'quantity_sold', 'revenue']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for record in records:
writer.writerow(record)
print(f"\nDataset saved to: {output_file}")
# Show sample
print("\nFirst 10 records:")
for i, record in enumerate(records[:10]):
print(f" {record}")
print("\nLast 10 records:")
for i, record in enumerate(records[-10:]):
print(f" {record}")