| """ |
| Health check and system monitoring endpoints |
| """ |
| import time |
| import psutil |
| from typing import Dict, Any |
| from fastapi import APIRouter, HTTPException |
| from fastapi.responses import JSONResponse |
| from src.core.logging import structured_logger |
| from src.core.database import get_session |
| from sqlmodel import Session |
|
|
| router = APIRouter() |
|
|
| class SystemMonitor: |
| """System monitoring utilities""" |
|
|
| @staticmethod |
| def get_system_metrics() -> Dict[str, Any]: |
| """Get system performance metrics""" |
| try: |
| metrics = { |
| "timestamp": time.time(), |
| "cpu": { |
| "percent": psutil.cpu_percent(interval=0.1), |
| "count": psutil.cpu_count(), |
| "freq": psutil.cpu_freq().current if psutil.cpu_freq() else None |
| }, |
| "memory": { |
| "percent": psutil.virtual_memory().percent, |
| "available": psutil.virtual_memory().available, |
| "used": psutil.virtual_memory().used, |
| "total": psutil.virtual_memory().total |
| }, |
| "disk": { |
| "percent": psutil.disk_usage("/").percent, |
| "free": psutil.disk_usage("/").free, |
| "used": psutil.disk_usage("/").used, |
| "total": psutil.disk_usage("/").total |
| }, |
| "network": { |
| "bytes_sent": psutil.net_io_counters().bytes_sent, |
| "bytes_recv": psutil.net_io_counters().bytes_recv, |
| "packets_sent": psutil.net_io_counters().packets_sent, |
| "packets_recv": psutil.net_io_counters().packets_recv |
| }, |
| "load": { |
| "1m": os.getloadavg()[0] if hasattr(os, 'getloadavg') else None, |
| "5m": os.getloadavg()[1] if hasattr(os, 'getloadavg') else None, |
| "15m": os.getloadavg()[2] if hasattr(os, 'getloadavg') else None |
| } |
| } |
|
|
| |
| structured_logger.log_metric("system_metrics_collected", 1, {"source": "health_check"}) |
|
|
| return metrics |
| except Exception as e: |
| structured_logger.log_error(e, None, "system_monitor") |
| return {"error": str(e)} |
|
|
| @staticmethod |
| def get_database_status(session: Session) -> Dict[str, Any]: |
| """Check database connection and status""" |
| try: |
| start_time = time.time() |
|
|
| |
| session.execute("SELECT 1") |
| session.commit() |
|
|
| execution_time = (time.time() - start_time) * 1000 |
|
|
| return { |
| "status": "healthy", |
| "connection_time_ms": execution_time, |
| "version": session.execute("SELECT version()").fetchone()[0], |
| "connection_pool": "active" |
| } |
| except Exception as e: |
| structured_logger.log_error(e, None, "database_monitor") |
| return { |
| "status": "unhealthy", |
| "error": str(e) |
| } |
|
|
| @staticmethod |
| def get_cache_status() -> Dict[str, Any]: |
| """Check cache status (Redis)""" |
| try: |
| import redis |
| r = redis.Redis(host="localhost", port=6379, db=0) |
| r.ping() |
|
|
| return { |
| "status": "healthy", |
| "redis_version": r.info("server").get("redis_version", "unknown"), |
| "connected_clients": r.info("clients").get("connected_clients", 0), |
| "used_memory": r.info("memory").get("used_memory_human", "unknown") |
| } |
| except Exception as e: |
| return { |
| "status": "unhealthy", |
| "error": str(e) |
| } |
|
|
| @router.get("/health") |
| async def health_check() -> Dict[str, Any]: |
| """Comprehensive health check endpoint""" |
| start_time = time.time() |
|
|
| try: |
| |
| session = get_session() |
|
|
| |
| system_metrics = SystemMonitor.get_system_metrics() |
|
|
| |
| database_status = SystemMonitor.get_database_status(session) |
|
|
| |
| cache_status = SystemMonitor.get_cache_status() |
|
|
| |
| app_metrics = { |
| "uptime": time.time() - start_time, |
| "version": "1.0.0", |
| "environment": "production" if not settings.DEBUG else "development", |
| "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" |
| } |
|
|
| |
| response_time = (time.time() - start_time) * 1000 |
|
|
| |
| structured_logger.log_health_check("healthy", response_time) |
|
|
| return { |
| "status": "healthy", |
| "timestamp": time.time(), |
| "response_time_ms": response_time, |
| "system": system_metrics, |
| "database": database_status, |
| "cache": cache_status, |
| "application": app_metrics |
| } |
|
|
| except Exception as e: |
| |
| structured_logger.log_error(e, None, "health_check") |
| structured_logger.log_health_check("unhealthy", (time.time() - start_time) * 1000) |
|
|
| raise HTTPException( |
| status_code=503, |
| detail=f"Health check failed: {str(e)}" |
| ) |
|
|
| @router.get("/metrics") |
| async def get_metrics() -> Dict[str, Any]: |
| """Get application metrics""" |
| try: |
| |
| session = get_session() |
|
|
| |
| system_metrics = SystemMonitor.get_system_metrics() |
|
|
| |
| database_status = SystemMonitor.get_database_status(session) |
|
|
| |
| cache_status = SystemMonitor.get_cache_status() |
|
|
| |
| from src.core.rate_limiting import usage_tracker |
| usage_stats = usage_tracker.get_usage_statistics() |
|
|
| |
| from src.core.database_monitoring import database_performance_monitor |
| db_stats = { |
| "query_stats": database_performance_monitor.get_query_statistics(), |
| "slow_queries": database_performance_monitor.get_slow_queries() |
| } |
|
|
| return { |
| "timestamp": time.time(), |
| "system": system_metrics, |
| "database": database_status, |
| "cache": cache_status, |
| "usage": usage_stats, |
| "database_performance": db_stats |
| } |
|
|
| except Exception as e: |
| structured_logger.log_error(e, None, "metrics") |
| raise HTTPException( |
| status_code=500, |
| detail=f"Failed to get metrics: {str(e)}" |
| ) |
|
|
| @router.get("/status") |
| async def simple_status() -> Dict[str, Any]: |
| """Simple status endpoint for load balancers""" |
| try: |
| |
| session = get_session() |
| session.execute("SELECT 1") |
| session.commit() |
|
|
| return { |
| "status": "ok", |
| "timestamp": time.time(), |
| "version": "1.0.0" |
| } |
| except Exception as e: |
| structured_logger.log_error(e, None, "status") |
| raise HTTPException( |
| status_code=503, |
| detail="Service unavailable" |
| ) |
|
|
| @router.get("/config") |
| async def get_configuration() -> Dict[str, Any]: |
| """Get application configuration (sanitized)""" |
| try: |
| config = { |
| "environment": "production" if not settings.DEBUG else "development", |
| "database_url": settings.DATABASE_URL if not settings.DEBUG else "sqlite:///./todo_app.db", |
| "debug": settings.DEBUG, |
| "version": "1.0.0" |
| } |
|
|
| return config |
| except Exception as e: |
| structured_logger.log_error(e, None, "config") |
| raise HTTPException( |
| status_code=500, |
| detail=f"Failed to get configuration: {str(e)}" |
| ) |