REFACTOR - Database logic

2025-08-08 09:08:41 +02:00
parent 0154365bfc
commit 488bb3ef93
113 changed files with 22842 additions and 6503 deletions
--- a/services/notification/app/repositories/log_repository.py
+++ b/services/notification/app/repositories/log_repository.py
@@ -0,0 +1,470 @@
+"""
+Log Repository
+Repository for notification log operations
+"""
+
+from typing import Optional, List, Dict, Any
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select, text, and_
+from datetime import datetime, timedelta
+import structlog
+import json
+
+from .base import NotificationBaseRepository
+from app.models.notifications import NotificationLog, NotificationStatus
+from shared.database.exceptions import DatabaseError, ValidationError
+
+logger = structlog.get_logger()
+
+
+class LogRepository(NotificationBaseRepository):
+    """Repository for notification log operations"""
+    
+    def __init__(self, session: AsyncSession, cache_ttl: Optional[int] = 120):
+        # Logs are very dynamic, very short cache time (2 minutes)
+        super().__init__(NotificationLog, session, cache_ttl)
+    
+    async def create_log_entry(self, log_data: Dict[str, Any]) -> NotificationLog:
+        """Create a new notification log entry"""
+        try:
+            # Validate log data
+            validation_result = self._validate_notification_data(
+                log_data, 
+                ["notification_id", "attempt_number", "status"]
+            )
+            
+            if not validation_result["is_valid"]:
+                raise ValidationError(f"Invalid log data: {validation_result['errors']}")
+            
+            # Set default values
+            if "attempted_at" not in log_data:
+                log_data["attempted_at"] = datetime.utcnow()
+            
+            # Serialize metadata if it's a dict
+            if "log_metadata" in log_data and isinstance(log_data["log_metadata"], dict):
+                log_data["log_metadata"] = json.dumps(log_data["log_metadata"])
+            
+            # Serialize provider response if it's a dict
+            if "provider_response" in log_data and isinstance(log_data["provider_response"], dict):
+                log_data["provider_response"] = json.dumps(log_data["provider_response"])
+            
+            # Create log entry
+            log_entry = await self.create(log_data)
+            
+            logger.debug("Notification log entry created",
+                        log_id=log_entry.id,
+                        notification_id=log_entry.notification_id,
+                        attempt_number=log_entry.attempt_number,
+                        status=log_entry.status.value)
+            
+            return log_entry
+            
+        except ValidationError:
+            raise
+        except Exception as e:
+            logger.error("Failed to create log entry",
+                        notification_id=log_data.get("notification_id"),
+                        error=str(e))
+            raise DatabaseError(f"Failed to create log entry: {str(e)}")
+    
+    async def get_logs_for_notification(
+        self, 
+        notification_id: str,
+        skip: int = 0,
+        limit: int = 50
+    ) -> List[NotificationLog]:
+        """Get all log entries for a specific notification"""
+        try:
+            return await self.get_multi(
+                filters={"notification_id": notification_id},
+                skip=skip,
+                limit=limit,
+                order_by="attempt_number",
+                order_desc=False
+            )
+            
+        except Exception as e:
+            logger.error("Failed to get logs for notification",
+                        notification_id=notification_id,
+                        error=str(e))
+            return []
+    
+    async def get_latest_log_for_notification(
+        self, 
+        notification_id: str
+    ) -> Optional[NotificationLog]:
+        """Get the most recent log entry for a notification"""
+        try:
+            logs = await self.get_multi(
+                filters={"notification_id": notification_id},
+                limit=1,
+                order_by="attempt_number",
+                order_desc=True
+            )
+            return logs[0] if logs else None
+            
+        except Exception as e:
+            logger.error("Failed to get latest log for notification",
+                        notification_id=notification_id,
+                        error=str(e))
+            return None
+    
+    async def get_failed_delivery_logs(
+        self,
+        hours_back: int = 24,
+        provider: str = None,
+        limit: int = 100
+    ) -> List[NotificationLog]:
+        """Get failed delivery logs for analysis"""
+        try:
+            cutoff_time = datetime.utcnow() - timedelta(hours=hours_back)
+            
+            conditions = [
+                "status = 'failed'",
+                "attempted_at >= :cutoff_time"
+            ]
+            params = {"cutoff_time": cutoff_time, "limit": limit}
+            
+            if provider:
+                conditions.append("provider = :provider")
+                params["provider"] = provider
+            
+            query_text = f"""
+                SELECT * FROM notification_logs
+                WHERE {' AND '.join(conditions)}
+                ORDER BY attempted_at DESC
+                LIMIT :limit
+            """
+            
+            result = await self.session.execute(text(query_text), params)
+            
+            logs = []
+            for row in result.fetchall():
+                record_dict = dict(row._mapping)
+                # Convert enum string back to enum object
+                record_dict["status"] = NotificationStatus(record_dict["status"])
+                log_entry = self.model(**record_dict)
+                logs.append(log_entry)
+            
+            return logs
+            
+        except Exception as e:
+            logger.error("Failed to get failed delivery logs",
+                        hours_back=hours_back,
+                        provider=provider,
+                        error=str(e))
+            return []
+    
+    async def get_delivery_performance_stats(
+        self,
+        hours_back: int = 24,
+        provider: str = None
+    ) -> Dict[str, Any]:
+        """Get delivery performance statistics"""
+        try:
+            cutoff_time = datetime.utcnow() - timedelta(hours=hours_back)
+            
+            conditions = ["attempted_at >= :cutoff_time"]
+            params = {"cutoff_time": cutoff_time}
+            
+            if provider:
+                conditions.append("provider = :provider")
+                params["provider"] = provider
+            
+            where_clause = " AND ".join(conditions)
+            
+            # Get overall statistics
+            stats_query = text(f"""
+                SELECT 
+                    COUNT(*) as total_attempts,
+                    COUNT(CASE WHEN status = 'sent' OR status = 'delivered' THEN 1 END) as successful_attempts,
+                    COUNT(CASE WHEN status = 'failed' THEN 1 END) as failed_attempts,
+                    AVG(response_time_ms) as avg_response_time_ms,
+                    MIN(response_time_ms) as min_response_time_ms,
+                    MAX(response_time_ms) as max_response_time_ms
+                FROM notification_logs 
+                WHERE {where_clause}
+            """)
+            
+            result = await self.session.execute(stats_query, params)
+            stats = result.fetchone()
+            
+            total = stats.total_attempts or 0
+            successful = stats.successful_attempts or 0
+            failed = stats.failed_attempts or 0
+            
+            success_rate = (successful / total * 100) if total > 0 else 0
+            failure_rate = (failed / total * 100) if total > 0 else 0
+            
+            # Get error breakdown
+            error_query = text(f"""
+                SELECT error_code, COUNT(*) as count
+                FROM notification_logs 
+                WHERE {where_clause} AND status = 'failed' AND error_code IS NOT NULL
+                GROUP BY error_code
+                ORDER BY count DESC
+                LIMIT 10
+            """)
+            
+            result = await self.session.execute(error_query, params)
+            error_breakdown = {row.error_code: row.count for row in result.fetchall()}
+            
+            # Get provider breakdown if not filtering by provider
+            provider_breakdown = {}
+            if not provider:
+                provider_query = text(f"""
+                    SELECT provider, 
+                           COUNT(*) as total,
+                           COUNT(CASE WHEN status = 'sent' OR status = 'delivered' THEN 1 END) as successful
+                    FROM notification_logs 
+                    WHERE {where_clause} AND provider IS NOT NULL
+                    GROUP BY provider
+                    ORDER BY total DESC
+                """)
+                
+                result = await self.session.execute(provider_query, params)
+                for row in result.fetchall():
+                    provider_success_rate = (row.successful / row.total * 100) if row.total > 0 else 0
+                    provider_breakdown[row.provider] = {
+                        "total": row.total,
+                        "successful": row.successful,
+                        "success_rate_percent": round(provider_success_rate, 2)
+                    }
+            
+            return {
+                "total_attempts": total,
+                "successful_attempts": successful,
+                "failed_attempts": failed,
+                "success_rate_percent": round(success_rate, 2),
+                "failure_rate_percent": round(failure_rate, 2),
+                "avg_response_time_ms": float(stats.avg_response_time_ms or 0),
+                "min_response_time_ms": int(stats.min_response_time_ms or 0),
+                "max_response_time_ms": int(stats.max_response_time_ms or 0),
+                "error_breakdown": error_breakdown,
+                "provider_breakdown": provider_breakdown,
+                "hours_analyzed": hours_back
+            }
+            
+        except Exception as e:
+            logger.error("Failed to get delivery performance stats",
+                        hours_back=hours_back,
+                        provider=provider,
+                        error=str(e))
+            return {
+                "total_attempts": 0,
+                "successful_attempts": 0,
+                "failed_attempts": 0,
+                "success_rate_percent": 0.0,
+                "failure_rate_percent": 0.0,
+                "avg_response_time_ms": 0.0,
+                "min_response_time_ms": 0,
+                "max_response_time_ms": 0,
+                "error_breakdown": {},
+                "provider_breakdown": {},
+                "hours_analyzed": hours_back
+            }
+    
+    async def get_logs_by_provider(
+        self,
+        provider: str,
+        hours_back: int = 24,
+        status: NotificationStatus = None,
+        limit: int = 100
+    ) -> List[NotificationLog]:
+        """Get logs for a specific provider"""
+        try:
+            cutoff_time = datetime.utcnow() - timedelta(hours=hours_back)
+            
+            conditions = [
+                "provider = :provider",
+                "attempted_at >= :cutoff_time"
+            ]
+            params = {"provider": provider, "cutoff_time": cutoff_time, "limit": limit}
+            
+            if status:
+                conditions.append("status = :status")
+                params["status"] = status.value
+            
+            query_text = f"""
+                SELECT * FROM notification_logs
+                WHERE {' AND '.join(conditions)}
+                ORDER BY attempted_at DESC
+                LIMIT :limit
+            """
+            
+            result = await self.session.execute(text(query_text), params)
+            
+            logs = []
+            for row in result.fetchall():
+                record_dict = dict(row._mapping)
+                # Convert enum string back to enum object
+                record_dict["status"] = NotificationStatus(record_dict["status"])
+                log_entry = self.model(**record_dict)
+                logs.append(log_entry)
+            
+            return logs
+            
+        except Exception as e:
+            logger.error("Failed to get logs by provider",
+                        provider=provider,
+                        error=str(e))
+            return []
+    
+    async def cleanup_old_logs(self, days_old: int = 30) -> int:
+        """Clean up old notification logs"""
+        try:
+            cutoff_date = datetime.utcnow() - timedelta(days=days_old)
+            
+            # Only delete logs for successfully delivered or permanently failed notifications
+            query_text = """
+                DELETE FROM notification_logs 
+                WHERE attempted_at < :cutoff_date
+                AND status IN ('delivered', 'failed')
+            """
+            
+            result = await self.session.execute(text(query_text), {"cutoff_date": cutoff_date})
+            deleted_count = result.rowcount
+            
+            logger.info("Cleaned up old notification logs",
+                       deleted_count=deleted_count,
+                       days_old=days_old)
+            
+            return deleted_count
+            
+        except Exception as e:
+            logger.error("Failed to cleanup old logs", error=str(e))
+            raise DatabaseError(f"Cleanup failed: {str(e)}")
+    
+    async def get_notification_timeline(
+        self, 
+        notification_id: str
+    ) -> Dict[str, Any]:
+        """Get complete timeline for a notification including all attempts"""
+        try:
+            logs = await self.get_logs_for_notification(notification_id)
+            
+            timeline = []
+            for log in logs:
+                entry = {
+                    "attempt_number": log.attempt_number,
+                    "status": log.status.value,
+                    "attempted_at": log.attempted_at.isoformat() if log.attempted_at else None,
+                    "provider": log.provider,
+                    "provider_message_id": log.provider_message_id,
+                    "response_time_ms": log.response_time_ms,
+                    "error_code": log.error_code,
+                    "error_message": log.error_message
+                }
+                
+                # Parse metadata if present
+                if log.log_metadata:
+                    try:
+                        entry["metadata"] = json.loads(log.log_metadata)
+                    except json.JSONDecodeError:
+                        entry["metadata"] = log.log_metadata
+                
+                # Parse provider response if present
+                if log.provider_response:
+                    try:
+                        entry["provider_response"] = json.loads(log.provider_response)
+                    except json.JSONDecodeError:
+                        entry["provider_response"] = log.provider_response
+                
+                timeline.append(entry)
+            
+            # Calculate summary statistics
+            total_attempts = len(logs)
+            successful_attempts = len([log for log in logs if log.status in [NotificationStatus.SENT, NotificationStatus.DELIVERED]])
+            failed_attempts = len([log for log in logs if log.status == NotificationStatus.FAILED])
+            
+            avg_response_time = 0
+            if logs:
+                response_times = [log.response_time_ms for log in logs if log.response_time_ms is not None]
+                avg_response_time = sum(response_times) / len(response_times) if response_times else 0
+            
+            return {
+                "notification_id": notification_id,
+                "total_attempts": total_attempts,
+                "successful_attempts": successful_attempts,
+                "failed_attempts": failed_attempts,
+                "avg_response_time_ms": round(avg_response_time, 2),
+                "timeline": timeline
+            }
+            
+        except Exception as e:
+            logger.error("Failed to get notification timeline",
+                        notification_id=notification_id,
+                        error=str(e))
+            return {
+                "notification_id": notification_id,
+                "error": str(e),
+                "timeline": []
+            }
+    
+    async def get_retry_analysis(self, days_back: int = 7) -> Dict[str, Any]:
+        """Analyze retry patterns and success rates"""
+        try:
+            cutoff_date = datetime.utcnow() - timedelta(days=days_back)
+            
+            # Get retry statistics
+            retry_query = text("""
+                SELECT 
+                    attempt_number,
+                    COUNT(*) as total_attempts,
+                    COUNT(CASE WHEN status = 'sent' OR status = 'delivered' THEN 1 END) as successful_attempts
+                FROM notification_logs 
+                WHERE attempted_at >= :cutoff_date
+                GROUP BY attempt_number
+                ORDER BY attempt_number
+            """)
+            
+            result = await self.session.execute(retry_query, {"cutoff_date": cutoff_date})
+            
+            retry_stats = {}
+            for row in result.fetchall():
+                success_rate = (row.successful_attempts / row.total_attempts * 100) if row.total_attempts > 0 else 0
+                retry_stats[row.attempt_number] = {
+                    "total_attempts": row.total_attempts,
+                    "successful_attempts": row.successful_attempts,
+                    "success_rate_percent": round(success_rate, 2)
+                }
+            
+            # Get common failure patterns
+            failure_query = text("""
+                SELECT 
+                    error_code,
+                    attempt_number,
+                    COUNT(*) as count
+                FROM notification_logs 
+                WHERE attempted_at >= :cutoff_date 
+                AND status = 'failed' 
+                AND error_code IS NOT NULL
+                GROUP BY error_code, attempt_number
+                ORDER BY count DESC
+                LIMIT 20
+            """)
+            
+            result = await self.session.execute(failure_query, {"cutoff_date": cutoff_date})
+            
+            failure_patterns = []
+            for row in result.fetchall():
+                failure_patterns.append({
+                    "error_code": row.error_code,
+                    "attempt_number": row.attempt_number,
+                    "count": row.count
+                })
+            
+            return {
+                "retry_statistics": retry_stats,
+                "failure_patterns": failure_patterns,
+                "days_analyzed": days_back
+            }
+            
+        except Exception as e:
+            logger.error("Failed to get retry analysis", error=str(e))
+            return {
+                "retry_statistics": {},
+                "failure_patterns": [],
+                "days_analyzed": days_back,
+                "error": str(e)
+            }