bakery-ia/tests/test_onboarding_flow.sh

#!/bin/bash

# =================================================================
# IMPROVED ONBOARDING FLOW SIMULATION TEST SCRIPT
# =================================================================
# This script simulates the complete onboarding process using the
# real CSV data and proper import/validate endpoints

# Configuration
API_BASE="http://localhost:8000"
TEST_EMAIL="onboarding.test.$(date +%s)@bakery.com"
TEST_PASSWORD="TestPassword123!"
TEST_NAME="Test Bakery Owner"
REAL_CSV_FILE="bakery_sales_2023_2024.csv"
WS_BASE="ws://localhost:8002/api/v1/ws"
WS_TEST_DURATION=2000  # seconds to listen for WebSocket messages
WS_PID=""


# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

# Icons for steps
STEP_ICONS=("👤" "🏪" "📊" "🤖" "🎉")

echo -e "${CYAN}🧪 IMPROVED ONBOARDING FLOW SIMULATION TEST${NC}"
echo -e "${CYAN}==============================================${NC}"
echo "Testing complete user journey through onboarding process"
echo "Using full CSV dataset: $REAL_CSV_FILE"
echo "Test User: $TEST_EMAIL"
echo ""

# Utility functions
log_step() {
    echo -e "${BLUE}📋 $1${NC}"
}

log_success() {
    echo -e "${GREEN}✅ $1${NC}"
}

log_error() {
    echo -e "${RED}❌ $1${NC}"
}

log_warning() {
    echo -e "${YELLOW}⚠️  $1${NC}"
}

check_response() {
    local response="$1"
    local step_name="$2"
    
    # Check for common error patterns
    if echo "$response" | grep -q '"detail"' && echo "$response" | grep -q '"error"'; then
        log_error "$step_name FAILED"
        echo "Error details: $response"
        return 1
    elif echo "$response" | grep -q '500 Internal Server Error'; then
        log_error "$step_name FAILED - Server Error"
        echo "Response: $response"
        return 1
    elif echo "$response" | grep -q '"status".*"error"'; then
        log_error "$step_name FAILED"
        echo "Response: $response"
        return 1
    elif echo "$response" | grep -q '"detail".*\['; then
        # This catches Pydantic validation errors (array of error objects)
        log_error "$step_name FAILED - Validation Error"
        echo "Response: $response"
        return 1
    else
        log_success "$step_name PASSED"
        return 0
    fi
}

# New function specifically for validation responses
check_validation_response() {
    local response="$1"
    local http_code="$2"
    local step_name="$3"
    
    # Check HTTP status first
    if [ "$http_code" != "200" ]; then
        log_error "$step_name FAILED - HTTP $http_code"
        echo "Response: $response"
        return 1
    fi
    
    # Check for validation-specific success indicators
    if echo "$response" | grep -q '"is_valid".*true'; then
        log_success "$step_name PASSED"
        return 0
    elif echo "$response" | grep -q '"is_valid".*false'; then
        log_warning "$step_name FAILED - Validation errors found"
        return 1
    else
        # Fall back to generic error checking
        check_response "$response" "$step_name"
        return $?
    fi
}

extract_json_field() {
    local response="$1"
    local field="$2"
    
    # Create a temporary file for the JSON to avoid shell escaping issues
    local temp_file="/tmp/json_response_$.json"
    echo "$response" > "$temp_file"
    
    python3 -c "
import json
try:
    with open('$temp_file', 'r') as f:
        data = json.load(f)
    value = data.get('$field', '')
    print(value)
except Exception as e:
    print('')
" 2>/dev/null || echo ""
    
    # Clean up
    rm -f "$temp_file"
}

# Function to escape CSV content for JSON
escape_csv_for_json() {
    local csv_file="$1"
    # Use Python to properly escape for JSON to avoid sed issues
    python3 -c "
import json
import sys

# Read the CSV file
with open('$csv_file', 'r', encoding='utf-8') as f:
    content = f.read()

# Escape for JSON (this handles newlines, quotes, and control characters properly)
escaped = json.dumps(content)[1:-1]  # Remove the surrounding quotes that json.dumps adds
print(escaped)
"
}

# Function to check for timezone-related errors
check_timezone_error() {
    local response="$1"
    if echo "$response" | grep -q "Cannot convert tz-naive Timestamp"; then
        return 0  # Found timezone error
    fi
    return 1  # No timezone error
}

test_websocket_with_nodejs_builtin() {
    local tenant_id="$1"
    local job_id="$2"
    local max_duration="$3"  # Maximum time to wait (fallback)
    
    echo "Using Node.js with built-in modules for WebSocket testing..."
    echo "Will monitor until job completion or ${max_duration}s timeout"
    
    # Create ENHANCED Node.js WebSocket test script
    local ws_test_script="/tmp/websocket_test_$job_id.js"
    cat > "$ws_test_script" << 'EOF'
// ENHANCED WebSocket test - waits for job completion
const https = require('https');
const http = require('http');
const crypto = require('crypto');

const tenantId = process.argv[2];
const jobId = process.argv[3];
const maxDuration = parseInt(process.argv[4]) * 1000; // Convert to milliseconds
const accessToken = process.argv[5];
const wsUrl = process.argv[6];

console.log(`🚀 Starting enhanced WebSocket monitoring`);
console.log(`Connecting to: ${wsUrl}`);
console.log(`Will wait for job completion (max ${maxDuration/1000}s)`);

// Parse WebSocket URL
const url = new URL(wsUrl);
const isSecure = url.protocol === 'wss:';
const port = url.port || (isSecure ? 443 : 80);

// Create WebSocket key
const key = crypto.randomBytes(16).toString('base64');

// WebSocket handshake headers
const headers = {
    'Upgrade': 'websocket',
    'Connection': 'Upgrade',
    'Sec-WebSocket-Key': key,
    'Sec-WebSocket-Version': '13',
    'Authorization': `Bearer ${accessToken}`
};

const options = {
    hostname: url.hostname,
    port: port,
    path: url.pathname,
    method: 'GET',
    headers: headers
};

console.log(`Attempting WebSocket handshake to ${url.hostname}:${port}${url.pathname}`);

const client = isSecure ? https : http;
let messageCount = 0;
let jobCompleted = false;
let lastProgressUpdate = Date.now();
let highestProgress = 0;

// Enhanced job tracking
const jobStats = {
    startTime: Date.now(),
    progressUpdates: 0,
    stepsCompleted: [],
    productsProcessed: [],
    errors: []
};

const req = client.request(options);

req.on('upgrade', (res, socket, head) => {
    console.log('✅ WebSocket handshake successful');
    console.log('📡 Monitoring training progress...\n');
    
    let buffer = Buffer.alloc(0);
    
    socket.on('data', (data) => {
        buffer = Buffer.concat([buffer, data]);
        
        // WebSocket frame parsing
        while (buffer.length >= 2) {
            const firstByte = buffer[0];
            const secondByte = buffer[1];
            
            const fin = (firstByte & 0x80) === 0x80;
            const opcode = firstByte & 0x0F;
            const masked = (secondByte & 0x80) === 0x80;
            let payloadLength = secondByte & 0x7F;
            
            let offset = 2;
            
            // Handle extended payload length
            if (payloadLength === 126) {
                if (buffer.length < offset + 2) break;
                payloadLength = buffer.readUInt16BE(offset);
                offset += 2;
            } else if (payloadLength === 127) {
                if (buffer.length < offset + 8) break;
                const high = buffer.readUInt32BE(offset);
                const low = buffer.readUInt32BE(offset + 4);
                if (high !== 0) {
                    console.log('⚠️  Large payload detected, skipping...');
                    buffer = buffer.slice(offset + 8);
                    continue;
                }
                payloadLength = low;
                offset += 8;
            }
            
            // Check if we have the complete frame
            if (buffer.length < offset + payloadLength) {
                break; // Wait for more data
            }
            
            // Extract payload
            const payload = buffer.slice(offset, offset + payloadLength);
            buffer = buffer.slice(offset + payloadLength);
            
            // Handle different frame types
            if (opcode === 1 && fin) { // Text frame
                messageCount++;
                lastProgressUpdate = Date.now();
                const timestamp = new Date().toLocaleTimeString();
                
                try {
                    const messageText = payload.toString('utf8');
                    const message = JSON.parse(messageText);
                    
                    // Enhanced message processing
                    processTrainingMessage(message, timestamp);
                    
                } catch (e) {
                    const rawText = payload.toString('utf8');
                    console.log(`[${timestamp}] ⚠️  Raw message: ${rawText.substring(0, 200)}${rawText.length > 200 ? '...' : ''}`);
                }
                
            } else if (opcode === 8) { // Close frame
                console.log('🔌 WebSocket closed by server');
                socket.end();
                return;
                
            } else if (opcode === 9) { // Ping frame
                // Send pong response
                const pongFrame = Buffer.concat([
                    Buffer.from([0x8A, payload.length]),
                    payload
                ]);
                socket.write(pongFrame);
                
            } else if (opcode === 10) { // Pong frame
                // Ignore pong responses
                continue;
            }
        }
    });
    
    // Enhanced message processing function
    function processTrainingMessage(message, timestamp) {
        const messageType = message.type || 'unknown';
        const data = message.data || {};
        
        console.log(`[${timestamp}] 📨 Message ${messageCount}: ${messageType.toUpperCase()}`);
        
        // Track job statistics
        if (messageType === 'progress') {
            jobStats.progressUpdates++;
            const progress = data.progress || 0;
            const step = data.current_step || 'Unknown step';
            const product = data.current_product;
            
            // Update highest progress
            if (progress > highestProgress) {
                highestProgress = progress;
            }
            
            // Track steps
            if (step && !jobStats.stepsCompleted.includes(step)) {
                jobStats.stepsCompleted.push(step);
            }
            
            // Track products
            if (product && !jobStats.productsProcessed.includes(product)) {
                jobStats.productsProcessed.push(product);
            }
            
            // Display progress with enhanced formatting
            console.log(`   📊 Progress: ${progress}% (${step})`);
            if (product) {
                console.log(`   🍞 Product: ${product}`);
            }
            if (data.products_completed && data.products_total) {
                console.log(`   📦 Products: ${data.products_completed}/${data.products_total} completed`);
            }
            if (data.estimated_time_remaining_minutes) {
                console.log(`   ⏱️  ETA: ${data.estimated_time_remaining_minutes} minutes`);
            }
            
        } else if (messageType === 'completed') {
            jobCompleted = true;
            const duration = Math.round((Date.now() - jobStats.startTime) / 1000);
            
            console.log(`\n🎉 TRAINING COMPLETED SUCCESSFULLY!`);
            console.log(`   ⏱️  Total Duration: ${duration}s`);
            
            if (data.results) {
                const results = data.results;
                if (results.successful_trainings !== undefined) {
                    console.log(`   ✅ Models Trained: ${results.successful_trainings}`);
                }
                if (results.total_products !== undefined) {
                    console.log(`   📦 Total Products: ${results.total_products}`);
                }
                if (results.success_rate !== undefined) {
                    console.log(`   📈 Success Rate: ${results.success_rate}%`);
                }
            }
            
            // Close connection after completion
            setTimeout(() => {
                console.log('\n📊 Training job completed - closing WebSocket connection');
                socket.end();
            }, 2000); // Wait 2 seconds to ensure all final messages are received
            
        } else if (messageType === 'failed') {
            jobCompleted = true;
            jobStats.errors.push(data);
            
            console.log(`\n❌ TRAINING FAILED!`);
            if (data.error) {
                console.log(`   💥 Error: ${data.error}`);
            }
            if (data.error_details) {
                console.log(`   📝 Details: ${JSON.stringify(data.error_details, null, 2)}`);
            }
            
            // Close connection after failure
            setTimeout(() => {
                console.log('\n📊 Training job failed - closing WebSocket connection');
                socket.end();
            }, 2000);
            
        } else if (messageType === 'step_completed') {
            console.log(`   ✅ Step completed: ${data.step_name || 'Unknown'}`);
            
        } else if (messageType === 'product_started') {
            console.log(`   🚀 Started training: ${data.product_name || 'Unknown product'}`);
            
        } else if (messageType === 'product_completed') {
            console.log(`   ✅ Product completed: ${data.product_name || 'Unknown product'}`);
            if (data.metrics) {
                console.log(`   📊 Metrics: ${JSON.stringify(data.metrics, null, 2)}`);
            }
        }
        
        console.log(''); // Add spacing between messages
    }
    
    socket.on('end', () => {
        const duration = Math.round((Date.now() - jobStats.startTime) / 1000);
        
        console.log(`\n📊 WebSocket connection ended`);
        console.log(`📨 Total messages received: ${messageCount}`);
        console.log(`⏱️  Connection duration: ${duration}s`);
        console.log(`📈 Highest progress reached: ${highestProgress}%`);
        
        if (jobCompleted) {
            console.log('✅ Job completed successfully - connection closed normally');
            process.exit(0);
        } else {
            console.log('⚠️  Connection ended before job completion');
            console.log(`📊 Progress reached: ${highestProgress}%`);
            console.log(`📋 Steps completed: ${jobStats.stepsCompleted.length}`);
            process.exit(1);
        }
    });
    
    socket.on('error', (error) => {
        console.log(`❌ WebSocket error: ${error.message}`);
        process.exit(1);
    });
    
    // Enhanced ping mechanism - send pings more frequently
    const pingInterval = setInterval(() => {
        if (socket.writable && !jobCompleted) {
            try {
                const pingFrame = Buffer.from([0x89, 0x00]);
                socket.write(pingFrame);
            } catch (e) {
                // Ignore ping errors
            }
        }
    }, 5000); // Ping every 5 seconds
    
    // Heartbeat check - ensure we're still receiving messages
    const heartbeatInterval = setInterval(() => {
        if (!jobCompleted) {
            const timeSinceLastMessage = Date.now() - lastProgressUpdate;
            
            if (timeSinceLastMessage > 60000) { // 60 seconds without messages
                console.log('\n⚠️  No messages received for 60 seconds');
                console.log('   This could indicate the training is stuck or connection issues');
                console.log(`   Last progress: ${highestProgress}%`);
            } else if (timeSinceLastMessage > 30000) { // 30 seconds warning
                console.log(`\n💤 Quiet period: ${Math.round(timeSinceLastMessage/1000)}s since last update`);
                console.log('   (This is normal during intensive training phases)');
            }
        }
    }, 15000); // Check every 15 seconds
    
    // Safety timeout - close connection if max duration exceeded
    const safetyTimeout = setTimeout(() => {
        if (!jobCompleted) {
            clearInterval(pingInterval);
            clearInterval(heartbeatInterval);
            
            console.log(`\n⏰ Maximum duration (${maxDuration/1000}s) reached`);
            console.log(`📊 Final status:`);
            console.log(`   📨 Messages received: ${messageCount}`);
            console.log(`   📈 Progress reached: ${highestProgress}%`);
            console.log(`   📋 Steps completed: ${jobStats.stepsCompleted.length}`);
            console.log(`   🍞 Products processed: ${jobStats.productsProcessed.length}`);
            
            if (messageCount > 0) {
                console.log('\n✅ WebSocket communication was successful!');
                console.log('   Training may still be running - check server logs for completion');
            } else {
                console.log('\n⚠️  No messages received during monitoring period');
            }
            
            socket.end();
        }
    }, maxDuration);
    
    // Clean up intervals when job completes
    socket.on('end', () => {
        clearInterval(pingInterval);
        clearInterval(heartbeatInterval);
        clearTimeout(safetyTimeout);
    });
});

req.on('response', (res) => {
    console.log(`❌ HTTP response instead of WebSocket upgrade: ${res.statusCode}`);
    console.log('Response headers:', res.headers);
    
    let body = '';
    res.on('data', chunk => body += chunk);
    res.on('end', () => {
        if (body) console.log('Response body:', body);
        process.exit(1);
    });
});

req.on('error', (error) => {
    console.log(`❌ Connection error: ${error.message}`);
    process.exit(1);
});

req.end();
EOF

    # Run the ENHANCED Node.js WebSocket test
    local ws_url="$WS_BASE/tenants/$tenant_id/training/jobs/$job_id/live"
    echo "Starting enhanced WebSocket monitoring..."
    node "$ws_test_script" "$tenant_id" "$job_id" "$max_duration" "$ACCESS_TOKEN" "$ws_url"
    local exit_code=$?
    
    # Clean up
    rm -f "$ws_test_script"
    
    if [ $exit_code -eq 0 ]; then
        log_success "Training job completed successfully!"
        echo "  📡 WebSocket monitoring detected job completion"
        echo "  🎉 Real-time progress tracking worked perfectly"
    else
        log_warning "WebSocket monitoring ended before job completion"
        echo "  📊 Check the progress logs above for details"
    fi
    
    return $exit_code
}


install_websocat_if_needed() {
    if ! command -v websocat >/dev/null 2>&1; then
        echo "📦 Installing websocat for better WebSocket testing..."
        
        # Try to install websocat (works on most Linux systems)
        if command -v cargo >/dev/null 2>&1; then
            cargo install websocat 2>/dev/null || true
        elif [ -x "$(command -v wget)" ]; then
            wget -q -O /tmp/websocat "https://github.com/vi/websocat/releases/latest/download/websocat.x86_64-unknown-linux-musl" 2>/dev/null || true
            if [ -f /tmp/websocat ]; then
                chmod +x /tmp/websocat
                sudo mv /tmp/websocat /usr/local/bin/ 2>/dev/null || mv /tmp/websocat ~/bin/ 2>/dev/null || true
            fi
        fi
        
        if command -v websocat >/dev/null 2>&1; then
            log_success "websocat installed successfully"
            return 0
        else
            log_warning "websocat installation failed, using Node.js fallback"
            return 1
        fi
    fi
    return 0
}

# IMPROVED: WebSocket connection function with better tool selection
test_websocket_connection() {
    local tenant_id="$1"
    local job_id="$2"
    local duration="$3"
    
    log_step "4.2. Connecting to WebSocket for real-time progress monitoring"
    
    echo "WebSocket URL: $WS_BASE/tenants/$tenant_id/training/jobs/$job_id/live"
    echo "Test duration: ${duration}s"
    echo ""
    
    # Try to install websocat if not available
    if install_websocat_if_needed; then
        test_websocket_with_websocat "$tenant_id" "$job_id" "$duration"
    elif command -v node >/dev/null 2>&1; then
        test_websocket_with_nodejs_builtin "$tenant_id" "$job_id" "$duration"
    else
        test_websocket_with_curl "$tenant_id" "$job_id" "$duration"
    fi
}

# Test WebSocket using websocat (recommended)
test_websocket_with_websocat() {
    local tenant_id="$1"
    local job_id="$2"
    local duration="$3"
    
    echo "Using websocat for WebSocket testing..."
    
    # Create a temporary file for WebSocket messages
    local ws_log="/tmp/websocket_messages_$job_id.log"
    
    # Start WebSocket connection in background
    (
        echo "Connecting to WebSocket..."
        timeout "${duration}s" websocat "$WS_BASE/tenants/$tenant_id/training/jobs/$job_id/live" \
            --header "Authorization: Bearer $ACCESS_TOKEN" 2>&1 | \
        while IFS= read -r line; do
            echo "$(date '+%H:%M:%S') | $line" | tee -a "$ws_log"
        done
    ) &
    
    WS_PID=$!
    
    # Send periodic ping messages to keep connection alive
    sleep 2
    if kill -0 $WS_PID 2>/dev/null; then
        echo "ping" | websocat "$WS_BASE/tenants/$tenant_id/training/jobs/$job_id/live" \
            --header "Authorization: Bearer $ACCESS_TOKEN" >/dev/null 2>&1 &
    fi
    
    # Wait for test duration
    log_step "4.2.1. Listening for WebSocket messages (${duration}s)..."
    wait_for_websocket_messages "$ws_log" "$duration"
    
    # Clean up
    if kill -0 $WS_PID 2>/dev/null; then
        kill $WS_PID 2>/dev/null
        wait $WS_PID 2>/dev/null
    fi
}

# Wait for WebSocket messages and analyze them
wait_for_websocket_messages() {
    local ws_log="$1"
    local duration="$2"
    local start_time=$(date +%s)
    local end_time=$((start_time + duration))
    
    echo "📡 Monitoring WebSocket messages..."
    echo "Log file: $ws_log"
    
    # Show real-time progress
    while [ $(date +%s) -lt $end_time ]; do
        if [ -f "$ws_log" ]; then
            local message_count=$(wc -l < "$ws_log" 2>/dev/null || echo "0")
            local elapsed=$(($(date +%s) - start_time))
            printf "\r⏱️  Elapsed: ${elapsed}s | Messages: $message_count"
        fi
        sleep 1
    done
    
    echo ""
    
    # Analyze received messages
    if [ -f "$ws_log" ] && [ -s "$ws_log" ]; then
        local total_messages=$(wc -l < "$ws_log")
        log_success "WebSocket test completed - received $total_messages messages"
        
        echo ""
        echo "📊 Message Analysis:"
        
        # Show message types
        if grep -q "progress" "$ws_log"; then
            local progress_count=$(grep -c "progress" "$ws_log")
            echo "  📈 Progress updates: $progress_count"
        fi
        
        if grep -q "completed" "$ws_log"; then
            echo "  ✅ Completion messages: $(grep -c "completed" "$ws_log")"
        fi
        
        if grep -q "failed\|error" "$ws_log"; then
            echo "  ❌ Error messages: $(grep -c "failed\|error" "$ws_log")"
        fi
        
        echo ""
        echo "📝 Recent messages (last 5):"
        tail -5 "$ws_log" | sed 's/^/  /'
        
    else
        log_warning "No WebSocket messages received during test period"
        echo "  This could mean:"
        echo "  • Training completed before WebSocket connection was established"
        echo "  • WebSocket endpoint is not working correctly"
        echo "  • Authentication issues with WebSocket connection"
        echo "  • Training service is not publishing progress events"
    fi
    
    # Clean up log file
    rm -f "$ws_log"
}

# Enhanced training step with WebSocket testing
enhanced_training_step_with_completion_check() {
    echo -e "${STEP_ICONS[3]} ${PURPLE}STEP 4: MODEL TRAINING WITH SMART WEBSOCKET MONITORING${NC}"
    echo "Enhanced training step with completion-aware progress monitoring"
    echo ""

    log_step "4.1. Initiating model training with FULL dataset"

    # Start training job
    TRAINING_RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" -X POST "$API_BASE/api/v1/tenants/$TENANT_ID/training/jobs" \
        -H "Authorization: Bearer $ACCESS_TOKEN" \
        -H "Content-Type: application/json" \
        -d '{}')

    # Extract HTTP code and response
    HTTP_CODE=$(echo "$TRAINING_RESPONSE" | grep "HTTP_CODE:" | cut -d: -f2)
    TRAINING_RESPONSE=$(echo "$TRAINING_RESPONSE" | sed '/HTTP_CODE:/d')

    echo "Training HTTP Status Code: $HTTP_CODE"
    echo "Training Response:"
    echo "$TRAINING_RESPONSE" | python3 -m json.tool 2>/dev/null || echo "$TRAINING_RESPONSE"

    if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
        # Extract training job details
        TRAINING_TASK_ID=$(extract_json_field "$TRAINING_RESPONSE" "task_id")
        JOB_ID=$(extract_json_field "$TRAINING_RESPONSE" "job_id")
        JOB_STATUS=$(extract_json_field "$TRAINING_RESPONSE" "status")
        
        # Use job_id if available, otherwise use task_id
        WEBSOCKET_JOB_ID="${JOB_ID:-$TRAINING_TASK_ID}"
        
        if [ -n "$WEBSOCKET_JOB_ID" ]; then
            log_success "Training job started successfully"
            echo "  Job ID: $WEBSOCKET_JOB_ID"
            echo "  Status: $JOB_STATUS"
            
            # Determine monitoring strategy based on initial status
            if [ "$JOB_STATUS" = "completed" ]; then
                log_warning "Training completed instantly - no real-time progress to monitor"
                echo "  This can happen when:"
                echo "    • Models are already trained and cached"
                echo "    • No valid products found in sales data"
                echo "    • Training data is insufficient"
                
                # Show training results
                TOTAL_PRODUCTS=$(extract_json_field "$TRAINING_RESPONSE" "training_results.total_products")
                SUCCESSFUL_TRAININGS=$(extract_json_field "$TRAINING_RESPONSE" "training_results.successful_trainings")
                SALES_RECORDS=$(extract_json_field "$TRAINING_RESPONSE" "data_summary.sales_records")
                
                echo ""
                echo "📊 Training Summary:"
                echo "  Sales records: $SALES_RECORDS"
                echo "  Products found: $TOTAL_PRODUCTS"
                echo "  Successful trainings: $SUCCESSFUL_TRAININGS"
                
                # Brief WebSocket connection test
                log_step "4.2. Testing WebSocket endpoint (demonstration mode)"
                echo "Testing WebSocket connection for 10 seconds..."
                test_websocket_with_nodejs_builtin "$TENANT_ID" "$WEBSOCKET_JOB_ID" "10"
                
            else
                # Training is in progress - use smart monitoring
                log_step "4.2. Starting smart WebSocket monitoring"
                echo "  Strategy: Monitor until job completion"
                echo "  Maximum wait time: ${WS_TEST_DURATION}s (safety timeout)"
                echo "  Will automatically close when training completes"
                echo ""
                
                # Use enhanced monitoring with longer timeout for real training
                local SMART_DURATION=$WS_TEST_DURATION
                
                # Estimate duration based on data size (optional enhancement)
                if [ -n "$SALES_RECORDS" ] && [ "$SALES_RECORDS" -gt 1000 ]; then
                    # For large datasets, extend timeout
                    SMART_DURATION=$((WS_TEST_DURATION * 2))
                    echo "  📊 Large dataset detected ($SALES_RECORDS records)"
                    echo "  🕐 Extended timeout to ${SMART_DURATION}s for thorough training"
                fi
                
                test_websocket_with_nodejs_builtin "$TENANT_ID" "$WEBSOCKET_JOB_ID" "$SMART_DURATION"
            fi
            
        else
            log_warning "Training started but couldn't extract job ID for WebSocket testing"
            echo "Response: $TRAINING_RESPONSE"
        fi
    else
        log_error "Training job failed to start (HTTP $HTTP_CODE)"
        echo "Response: $TRAINING_RESPONSE"
    fi

    echo ""
}
# =================================================================
# PRE-FLIGHT CHECKS
# =================================================================

echo -e "${PURPLE}🔍 Pre-flight checks...${NC}"

# Check if services are running
if ! curl -s "$API_BASE/health" > /dev/null; then
    log_error "API Gateway is not responding at $API_BASE"
    echo "Please ensure services are running: docker-compose up -d"
    exit 1
fi

log_success "API Gateway is responding"

# Check if CSV file exists
if [ ! -f "$REAL_CSV_FILE" ]; then
    log_error "Real CSV file not found: $REAL_CSV_FILE"
    echo "Please ensure the CSV file is in the current directory"
    exit 1
fi

log_success "Real CSV file found: $REAL_CSV_FILE"

# Show CSV file info - FULL DATASET
echo "CSV file info (FULL DATASET):"
echo "  Lines: $(wc -l < "$REAL_CSV_FILE")"
echo "  Size: $(du -h "$REAL_CSV_FILE" | cut -f1)"
echo "  Header: $(head -1 "$REAL_CSV_FILE")"

# Check individual services
services_check() {
    local service_ports=("8001:Auth" "8002:Training" "8003:Data" "8005:Tenant")
    for service in "${service_ports[@]}"; do
        IFS=':' read -r port name <<< "$service"
        if curl -s "http://localhost:$port/health" > /dev/null; then
            echo "  ✓ $name Service (port $port)"
        else
            log_warning "$name Service not responding on port $port"
        fi
    done
}

services_check
echo ""

# =================================================================
# STEP 1: USER REGISTRATION (ONBOARDING PAGE STEP 1)
# =================================================================

echo -e "${STEP_ICONS[0]} ${PURPLE}STEP 1: USER REGISTRATION${NC}"
echo "Simulating onboarding page step 1 - 'Crear Cuenta'"
echo ""

log_step "1.1. Registering new user account"
echo "Email: $TEST_EMAIL"
echo "Full Name: $TEST_NAME"
echo "Password: [HIDDEN]"

REGISTER_RESPONSE=$(curl -s -X POST "$API_BASE/api/v1/auth/register" \
    -H "Content-Type: application/json" \
    -d "{
        \"email\": \"$TEST_EMAIL\",
        \"password\": \"$TEST_PASSWORD\",
        \"full_name\": \"$TEST_NAME\"
    }")

echo "Registration Response:"
echo "$REGISTER_RESPONSE" | python3 -m json.tool 2>/dev/null || echo "$REGISTER_RESPONSE"

if check_response "$REGISTER_RESPONSE" "User Registration"; then
    USER_ID=$(extract_json_field "$REGISTER_RESPONSE" "id")
    if [ -n "$USER_ID" ]; then
        log_success "User ID extracted: $USER_ID"
    fi
else
    echo "Full response: $REGISTER_RESPONSE"
    exit 1
fi

echo ""

# =================================================================
# STEP 1.5: USER LOGIN (AUTOMATIC AFTER REGISTRATION)
# =================================================================

log_step "1.5. Logging in to get access token"

LOGIN_RESPONSE=$(curl -s -X POST "$API_BASE/api/v1/auth/login" \
    -H "Content-Type: application/json" \
    -d "{
        \"email\": \"$TEST_EMAIL\",
        \"password\": \"$TEST_PASSWORD\"
    }")

echo "Login Response:"
echo "$LOGIN_RESPONSE" | python3 -m json.tool 2>/dev/null || echo "$LOGIN_RESPONSE"

if check_response "$LOGIN_RESPONSE" "User Login"; then
    ACCESS_TOKEN=$(extract_json_field "$LOGIN_RESPONSE" "access_token")
    if [ -n "$ACCESS_TOKEN" ]; then
        log_success "Access token obtained"
    else
        log_error "Failed to extract access token"
        exit 1
    fi
else
    echo "Full response: $LOGIN_RESPONSE"
    exit 1
fi

echo ""

# =================================================================
# STEP 2: BAKERY REGISTRATION (ONBOARDING PAGE STEP 2)
# =================================================================

echo -e "${STEP_ICONS[1]} ${PURPLE}STEP 2: BAKERY REGISTRATION${NC}"
echo "Simulating onboarding page step 2 - 'Datos de Panadería'"
echo ""

log_step "2.1. Registering bakery/tenant with mock coordinates"

# Mock coordinates for Madrid locations (since geolocation service is not running)
# These are real Madrid coordinates for testing weather and traffic data acquisition
MADRID_COORDS=(
    "40.4168:-3.7038"  # Sol (city center)
    "40.4378:-3.6795"  # Retiro area
    "40.4093:-3.6936"  # Atocha area  
    "40.4517:-3.6847"  # Chamberí area
    "40.3897:-3.6774"  # Delicias area
)

# Select random coordinates from Madrid locations
SELECTED_COORDS=${MADRID_COORDS[$((RANDOM % ${#MADRID_COORDS[@]}))]}
IFS=':' read -r MOCK_LATITUDE MOCK_LONGITUDE <<< "$SELECTED_COORDS"

echo "Using mock coordinates for Madrid:"
echo "  Latitude: $MOCK_LATITUDE"
echo "  Longitude: $MOCK_LONGITUDE"
echo "  (This simulates the address-to-coordinates conversion service)"

# Using exact schema from BakeryRegistration with added coordinates
BAKERY_DATA="{
    \"name\": \"Panadería Test $(date +%H%M)\",
    \"business_type\": \"bakery\",
    \"address\": \"Calle Gran Vía 123\",
    \"city\": \"Madrid\",
    \"postal_code\": \"28001\",
    \"phone\": \"+34600123456\"
}"

echo "Bakery Data with mock coordinates:"
echo "$BAKERY_DATA" | python3 -m json.tool

BAKERY_RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" -X POST "$API_BASE/api/v1/tenants/register" \
    -H "Content-Type: application/json" \
    -H "Authorization: Bearer $ACCESS_TOKEN" \
    -d "$BAKERY_DATA")

# Extract HTTP code and response
HTTP_CODE=$(echo "$BAKERY_RESPONSE" | grep "HTTP_CODE:" | cut -d: -f2)
BAKERY_RESPONSE=$(echo "$BAKERY_RESPONSE" | sed '/HTTP_CODE:/d')

echo "HTTP Status Code: $HTTP_CODE"
echo "Bakery Registration Response:"
echo "$BAKERY_RESPONSE" | python3 -m json.tool 2>/dev/null || echo "$BAKERY_RESPONSE"

if check_response "$BAKERY_RESPONSE" "Bakery Registration"; then
    TENANT_ID=$(extract_json_field "$BAKERY_RESPONSE" "id")
    if [ -n "$TENANT_ID" ]; then
        log_success "Tenant ID extracted: $TENANT_ID"
        log_success "Mock coordinates will be used for weather/traffic data: ($MOCK_LATITUDE, $MOCK_LONGITUDE)"
        
        # Store coordinates for later use in training
        echo "BAKERY_LATITUDE=$MOCK_LATITUDE" > /tmp/bakery_coords.env
        echo "BAKERY_LONGITUDE=$MOCK_LONGITUDE" >> /tmp/bakery_coords.env
        echo "TENANT_ID=$TENANT_ID" >> /tmp/bakery_coords.env
        
        log_step "2.2. Testing weather data acquisition with mock coordinates"
        # Test if weather service can use these coordinates
        WEATHER_TEST_RESPONSE=$(curl -s -X GET "$API_BASE/api/v1/training/$TENANT_ID/weather/current?latitude=$MOCK_LATITUDE&longitude=$MOCK_LONGITUDE" \
            -H "Authorization: Bearer $ACCESS_TOKEN" \
            -H "X-Tenant-ID: $TENANT_ID" 2>/dev/null || echo '{"status":"service_unavailable"}')
        
        if echo "$WEATHER_TEST_RESPONSE" | grep -q '"temperature"\|"weather"'; then
            log_success "Weather service can use mock coordinates"
        else
            log_warning "Weather service test skipped (coordinates stored for training)"
        fi
        
        log_step "2.3. Testing traffic data acquisition with mock coordinates"
        # Test if traffic service can use these coordinates  
        TRAFFIC_TEST_RESPONSE=$(curl -s -X GET "$API_BASE/api/v1/training/$TENANT_ID/traffic/current?latitude=$MOCK_LATITUDE&longitude=$MOCK_LONGITUDE" \
            -H "Authorization: Bearer $ACCESS_TOKEN" \
            -H "X-Tenant-ID: $TENANT_ID" 2>/dev/null || echo '{"status":"service_unavailable"}')
        
        if echo "$TRAFFIC_TEST_RESPONSE" | grep -q '"traffic_volume"\|"intensity"'; then
            log_success "Traffic service can use mock coordinates"
        else
            log_warning "Traffic service test skipped (coordinates stored for training)"
        fi
        
    else
        log_error "Failed to extract tenant ID"
        exit 1
    fi
else
    echo "Full response: $BAKERY_RESPONSE"
    exit 1
fi

echo ""

# =================================================================
# STEP 3: SALES DATA UPLOAD (ONBOARDING PAGE STEP 3)
# =================================================================

echo -e "${STEP_ICONS[2]} ${PURPLE}STEP 3: SALES DATA UPLOAD${NC}"
echo "Simulating onboarding page step 3 - 'Historial de Ventas'"
echo ""

log_step "3.1. Validating full sales data format"

# Read and escape CSV content for JSON using Python for reliability
log_step "3.1.1. Preparing FULL CSV data for JSON transmission"

CSV_CONTENT=$(escape_csv_for_json "$REAL_CSV_FILE")

if [ $? -ne 0 ] || [ -z "$CSV_CONTENT" ]; then
    log_error "Failed to escape CSV content for JSON"
    exit 1
fi

log_success "FULL CSV content escaped successfully (length: ${#CSV_CONTENT} chars)"

# Create validation request using Python for proper JSON formatting
log_step "3.1.2. Creating validation request with FULL dataset"

VALIDATION_DATA_FILE="/tmp/validation_request.json"
python3 -c "
import json

# Read the FULL CSV content
with open('$REAL_CSV_FILE', 'r', encoding='utf-8') as f:
    csv_content = f.read()

# Create proper JSON request
request_data = {
    'data': csv_content,
    'data_format': 'csv',
    'validate_only': True,
    'source': 'onboarding_upload'
}

# Write to file
with open('$VALIDATION_DATA_FILE', 'w', encoding='utf-8') as f:
    json.dump(request_data, f, ensure_ascii=False, indent=2)

print('Validation request file created successfully')
"

if [ ! -f "$VALIDATION_DATA_FILE" ]; then
    log_error "Failed to create validation request file"
    exit 1
fi

echo "Validation request (first 200 chars):"
head -c 200 "$VALIDATION_DATA_FILE"
echo "..."

VALIDATION_RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" -X POST "$API_BASE/api/v1/tenants/$TENANT_ID/sales/import/validate" \
    -H "Content-Type: application/json" \
    -H "Authorization: Bearer $ACCESS_TOKEN" \
    -d @"$VALIDATION_DATA_FILE")

# Extract HTTP code and response
HTTP_CODE=$(echo "$VALIDATION_RESPONSE" | grep "HTTP_CODE:" | cut -d: -f2)
VALIDATION_RESPONSE=$(echo "$VALIDATION_RESPONSE" | sed '/HTTP_CODE:/d')

echo "HTTP Status Code: $HTTP_CODE"
echo "Validation Response:"
echo "$VALIDATION_RESPONSE" | python3 -m json.tool 2>/dev/null || echo "$VALIDATION_RESPONSE"

# Parse validation results using the SalesValidationResult schema
IS_VALID=$(extract_json_field "$VALIDATION_RESPONSE" "is_valid")
TOTAL_RECORDS=$(extract_json_field "$VALIDATION_RESPONSE" "total_records")
VALID_RECORDS=$(extract_json_field "$VALIDATION_RESPONSE" "valid_records")
INVALID_RECORDS=$(extract_json_field "$VALIDATION_RESPONSE" "invalid_records")

if [ "$IS_VALID" = "True" ]; then
    log_success "FULL sales data validation passed"
    echo "  Total records: $TOTAL_RECORDS"
    echo "  Valid records: $VALID_RECORDS"
    echo "  Invalid records: $INVALID_RECORDS"
elif [ "$IS_VALID" = "False" ]; then
    log_error "FULL sales data validation failed"
    echo "  Total records: $TOTAL_RECORDS"
    echo "  Valid records: $VALID_RECORDS"
    echo "  Invalid records: $INVALID_RECORDS"
    
    # Extract and display errors
    echo "Validation errors:"
    echo "$VALIDATION_RESPONSE" | python3 -c "
import json, sys
try:
    data = json.load(sys.stdin)
    errors = data.get('errors', [])
    for i, err in enumerate(errors[:5]):  # Show first 5 errors
        print(f'  {i+1}. {err.get(\"message\", \"Unknown error\")}')
    if len(errors) > 5:
        print(f'  ... and {len(errors) - 5} more errors')
except:
    print('  Could not parse error details')
" 2>/dev/null
    
    log_warning "Validation failed, but continuing to test import flow..."
else
    log_warning "Validation response format unexpected, but continuing..."
fi

log_step "3.2. Importing FULL sales data using file upload"

# The import endpoint expects form data (file upload), not JSON
# Use curl's -F flag for multipart/form-data

IMPORT_RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" -X POST "$API_BASE/api/v1/tenants/$TENANT_ID/sales/import" \
    -H "Authorization: Bearer $ACCESS_TOKEN" \
    -F "file=@$REAL_CSV_FILE" \
    -F "file_format=csv")

# Extract HTTP code and response
HTTP_CODE=$(echo "$IMPORT_RESPONSE" | grep "HTTP_CODE:" | cut -d: -f2)
IMPORT_RESPONSE=$(echo "$IMPORT_RESPONSE" | sed '/HTTP_CODE:/d')

echo "Import HTTP Status Code: $HTTP_CODE"
echo "Import Response:"
echo "$IMPORT_RESPONSE" | python3 -m json.tool 2>/dev/null || echo "$IMPORT_RESPONSE"

# Check for import success using SalesImportResult schema
if [ "$HTTP_CODE" = "200" ]; then
    IMPORT_SUCCESS=$(extract_json_field "$IMPORT_RESPONSE" "success")
    RECORDS_CREATED=$(extract_json_field "$IMPORT_RESPONSE" "records_created")
    RECORDS_FAILED=$(extract_json_field "$IMPORT_RESPONSE" "records_failed")
    RECORDS_PROCESSED=$(extract_json_field "$IMPORT_RESPONSE" "records_processed")
    SUCCESS_RATE=$(extract_json_field "$IMPORT_RESPONSE" "success_rate")
    
    if [ "$IMPORT_SUCCESS" = "True" ] || [ "$IMPORT_SUCCESS" = "true" ]; then
        log_success "FULL dataset import completed successfully"
        echo "  Records processed: $RECORDS_PROCESSED"
        echo "  Records created: $RECORDS_CREATED"
        echo "  Records failed: $RECORDS_FAILED"
        echo "  Success rate: $SUCCESS_RATE%"
        echo "  Processing time: $(extract_json_field "$IMPORT_RESPONSE" "processing_time_seconds")s"
        
        if [ "$RECORDS_FAILED" -gt 0 ] 2>/dev/null; then
            log_warning "$RECORDS_FAILED records failed during import"
        fi
    elif [ "$IMPORT_SUCCESS" = "False" ] || [ "$IMPORT_SUCCESS" = "false" ]; then
        log_error "Import reported failure despite HTTP 200"
        echo "Import response: $IMPORT_RESPONSE"
    else
        log_warning "Could not parse import success field (got: '$IMPORT_SUCCESS')"
        
        # Fallback: if we got HTTP 200 and response contains records data, assume success
        if echo "$IMPORT_RESPONSE" | grep -q '"records_created"\|"records_processed"'; then
            log_success "Import appears successful based on response content"
            FALLBACK_CREATED=$(echo "$IMPORT_RESPONSE" | grep -o '"records_created":[0-9]*' | cut -d: -f2 | head -1)
            FALLBACK_PROCESSED=$(echo "$IMPORT_RESPONSE" | grep -o '"records_processed":[0-9]*' | cut -d: -f2 | head -1)
            echo "  Records processed: $FALLBACK_PROCESSED"
            echo "  Records created: $FALLBACK_CREATED"
        fi
    fi
else
    log_warning "FULL dataset import failed with HTTP $HTTP_CODE, but continuing with test..."
    
    # Check for timezone error specifically
    if check_timezone_error "$IMPORT_RESPONSE"; then
        log_warning "Detected timezone conversion error - this is a known issue"
        echo "Consider applying timezone fix to data import service"
    fi
fi

echo ""

# =================================================================
# STEP 4: MODEL TRAINING (ONBOARDING PAGE STEP 4)
# =================================================================

test_websocket_connection

enhanced_training_step_with_completion_check

echo ""

# =================================================================
# STEP 5: ONBOARDING COMPLETION (DASHBOARD ACCESS)
# =================================================================

log_step "5.1. Testing basic dashboard functionality"

# forecast request with proper schema
FORECAST_REQUEST="{
    \"product_name\": \"pan\",
    \"forecast_date\": \"2025-08-02\",
    \"forecast_days\": 1,
    \"location\": \"madrid_centro\",
    \"confidence_level\": 0.85
}"

echo "Forecast Request:"
echo "$FORECAST_REQUEST" | python3 -m json.tool

# Make the API call
FORECAST_RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" -X POST "$API_BASE/api/v1/tenants/$TENANT_ID/forecasts/single" \
    -H "Content-Type: application/json" \
    -H "Authorization: Bearer $ACCESS_TOKEN" \
    -d "$FORECAST_REQUEST")

# Extract HTTP code and response
HTTP_CODE=$(echo "$FORECAST_RESPONSE" | grep "HTTP_CODE:" | cut -d: -f2)
FORECAST_RESPONSE=$(echo "$FORECAST_RESPONSE" | sed '/HTTP_CODE:/d')

echo "Forecast HTTP Status: $HTTP_CODE"
echo "Forecast Response:"
echo "$FORECAST_RESPONSE" | python3 -m json.tool 2>/dev/null || echo "$FORECAST_RESPONSE"

# Validate response
if [ "$HTTP_CODE" = "200" ]; then
    if echo "$FORECAST_RESPONSE" | grep -q '"predicted_demand"\|"id"'; then
        log_success "Forecasting service is working correctly"
        
        # Extract key values for validation
        PREDICTED_DEMAND=$(extract_json_field "$FORECAST_RESPONSE" "predicted_demand")
        CONFIDENCE_LOWER=$(extract_json_field "$FORECAST_RESPONSE" "confidence_lower")
        CONFIDENCE_UPPER=$(extract_json_field "$FORECAST_RESPONSE" "confidence_upper")
        
        if [ -n "$PREDICTED_DEMAND" ]; then
            echo "  Predicted Demand: $PREDICTED_DEMAND"
            echo "  Confidence Range: [$CONFIDENCE_LOWER, $CONFIDENCE_UPPER]"
        fi
    else
        log_error "Forecast response missing expected fields"
        echo "Response: $FORECAST_RESPONSE"
    fi
elif [ "$HTTP_CODE" = "422" ]; then
    log_error "Forecast request validation failed"
    echo "Validation errors: $FORECAST_RESPONSE"
elif [ "$HTTP_CODE" = "404" ]; then
    log_warning "Forecast endpoint not found - check API routing"
elif [ "$HTTP_CODE" = "500" ]; then
    log_error "Internal server error in forecasting service"
    echo "Error details: $FORECAST_RESPONSE"
else
    log_warning "Forecasting may not be ready yet (HTTP $HTTP_CODE)"
    echo "Response: $FORECAST_RESPONSE"
fi

echo ""

# =================================================================
# SUMMARY AND CLEANUP
# =================================================================

echo -e "${CYAN}📊 IMPROVED ONBOARDING FLOW TEST SUMMARY${NC}"
echo -e "${CYAN}=========================================${NC}"

echo ""
echo "✅ Completed Onboarding Steps:"
echo "  ${STEP_ICONS[0]} Step 1: User Registration ✓"
echo "  ${STEP_ICONS[1]} Step 2: Bakery Registration ✓"  
echo "  ${STEP_ICONS[2]} Step 3: FULL Sales Data Upload ✓"
echo "  ${STEP_ICONS[3]} Step 4: Model Training with FULL Data ✓"
echo "  ${STEP_ICONS[4]} Step 5: Onboarding Complete ✓"

echo ""
echo "📋 Test Results:"
echo "  User ID: $USER_ID"
echo "  Tenant ID: $TENANT_ID"
echo "  Training Task ID: $TRAINING_TASK_ID"
echo "  Test Email: $TEST_EMAIL"
echo "  FULL CSV Used: $REAL_CSV_FILE"
echo "  Total Records in Dataset: $(wc -l < "$REAL_CSV_FILE" 2>/dev/null || echo "Unknown")"

echo ""
echo "📈 Data Quality:"
if [ -n "$TOTAL_RECORDS" ]; then
    echo "  Total Records Processed: $TOTAL_RECORDS"
    echo "  Valid Records: $VALID_RECORDS"
    echo "  Invalid Records: $INVALID_RECORDS"
    if [ "$TOTAL_RECORDS" -gt 0 ]; then
        VALID_PERCENTAGE=$(python3 -c "print(round(${VALID_RECORDS:-0} / ${TOTAL_RECORDS} * 100, 1))" 2>/dev/null || echo "N/A")
        echo "  Data Quality: $VALID_PERCENTAGE% valid"
    fi
else
    echo "  Data validation metrics not available"
fi

echo ""
echo "🔧 Known Issues Detected:"
if echo "$IMPORT_RESPONSE$FILE_UPLOAD_RESPONSE" | grep -q "Cannot convert tz-naive"; then
    echo "  ❌ TIMEZONE ERROR: CSV dates are timezone-naive"
    echo "     Solution: Apply timezone fix patch to data import service"
    echo "     File: services/data/app/services/data_import_service.py"
    echo "     Method: Replace _parse_date() with timezone-aware version"
fi

echo ""
echo "🧹 Cleanup:"
echo "  To clean up test data, you may want to remove:"
echo "  - Test user: $TEST_EMAIL"
echo "  - Test tenant: $TENANT_ID"

# Cleanup temporary files
rm -f "$VALIDATION_DATA_FILE"

echo ""
log_success "Improved onboarding flow simulation completed successfully!"
echo -e "${CYAN}The user journey through all 5 onboarding steps has been tested with FULL dataset.${NC}"

# Final status check
if [ -n "$USER_ID" ] && [ -n "$TENANT_ID" ]; then
    echo ""
    echo -e "${GREEN}🎉 All critical onboarding functionality is working!${NC}"
    echo "The user can successfully:"
    echo "  • Register an account"
    echo "  • Set up their bakery"
    echo "  • Upload and validate FULL sales data"
    echo "  • Start model training with FULL dataset"
    echo "  • Access the platform dashboard"
    
    if [ -n "$VALID_RECORDS" ] && [ "$VALID_RECORDS" -gt 0 ]; then
        echo ""
        echo -e "${GREEN}🏆 BONUS: FULL dataset was successfully processed!${NC}"
        echo "  • $VALID_RECORDS valid sales records imported from FULL dataset"
        echo "  • Model training initiated with all products"
        echo "  • End-to-end data pipeline verified with complete data"
    fi
    
    exit 0
else
    echo ""
    echo -e "${YELLOW}⚠️  Some issues detected in the onboarding flow${NC}"
    echo "Check the logs above for specific failures"
    exit 1
fi