Spaces:

e1250
/

tracking_system_backend

Running

App Files Files Community

e1250 commited on 11 days ago

Commit

efc36ce

1 Parent(s): 3d237cb

prod

Browse files

Files changed (13) hide show

api/dependencies.py +5 -1
api/routers/camera_stream.py +106 -61
api/routers/dashboard_stream.py +7 -10
api/routers/health.py +17 -13
api/routers/metrics.py +9 -19
config/settings.py +37 -19
contracts/camera_metadata.py +3 -1
domain/detection_box_center.py +12 -0
domain/logger.py +5 -5
infra/logger_structlog.py +16 -10
infra/system_metrics.py +7 -5
main.py +30 -20
utils/experiment.py +5 -2

api/dependencies.py CHANGED Viewed

@@ -1,14 +1,18 @@
 # Here exists function to use instead of using app.state directly in the main.py
 from fastapi.requests import HTTPConnection
 def get_detection_model(request: HTTPConnection):
     return request.app.state.detection_model
 def get_depth_model(request: HTTPConnection):
     return request.app.state.depth_model
 def get_safety_detection_model(request: HTTPConnection):
     return request.app.state.safety_detection_model
 def get_redis(request: HTTPConnection):
-    return request.app.state.redis

 # Here exists function to use instead of using app.state directly in the main.py
 from fastapi.requests import HTTPConnection
 def get_detection_model(request: HTTPConnection):
     return request.app.state.detection_model
 def get_depth_model(request: HTTPConnection):
     return request.app.state.depth_model
 def get_safety_detection_model(request: HTTPConnection):
     return request.app.state.safety_detection_model
 def get_redis(request: HTTPConnection):
+    return request.app.state.redis

api/routers/camera_stream.py CHANGED Viewed

@@ -1,13 +1,17 @@
 from api.dependencies import get_safety_detection_model
 from api.dependencies import get_detection_model, get_depth_model
 import asyncio
 import itertools
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends
-from pandas.core.frame import nested_data_to_arrays
-from ai.contracts.detector import DetectionResults
-from api.routers.metrics import active_cameras, decode_duration_seconds, depth_duration_seconds, detection_duration_seconds, frame_processing_duration_seconds
 from contracts.camera_metadata import CameraMetadata, DetectionMetadata
-import traceback
 import mlflow
 from utils.experiment import log_config
@@ -17,20 +21,21 @@ import time
 router = APIRouter()
 @router.websocket("/stream/{camera_id}")
 async def websocket_detect(
-    websocket: WebSocket,
-    camera_id:str,
     detector=Depends(get_detection_model),
     safety_detector=Depends(get_safety_detection_model),
-    depth_model=Depends(get_depth_model)
-    ):
     """
-    WebSocket stream takes the frame pass it to the ai models, save it under the camera id provided in the url.
      url here is:  ws://127.0.0.1:8000/detectors/stream/camera_id
     """
-    # Yes, I asked the same questions, is using webscoket.app.state many times here is consuming. after checking, it is not performance consuming.
     state = websocket.app.state
     logger = state.logger
     # Using Depends is important and called Inversion Of Control (IoC)/ Dependency injection, and is important for testing.
@@ -39,14 +44,16 @@ async def websocket_detect(
     # Accepting the connection from the client
     await websocket.accept()
-    # Logging and tracking action
     active_cameras.inc()
-    await redis.sadd("cameras:active", camera_id)  # Save connected camera name into redis
     logger.info(f"Client ID >>{camera_id}<< Connected...")
     step_counter = itertools.count()
-    loop = asyncio.get_running_loop()
     # Queue removing old images in case they were being stacked
     frame_queue: asyncio.Queue = asyncio.Queue(maxsize=1)
@@ -62,83 +69,119 @@ async def websocket_detect(
                         logger.debug("Frame Dropped", camera_id=camera_id)
                     except asyncio.QueueEmpty:
                         pass
                 await frame_queue.put(frame_bytes)
         except WebSocketDisconnect:
             raise
     async def process_frames():
         try:
             logger.info(f"Camera {camera_id} start sending frames...")
-            def decode_frame(fb): return cv.imdecode(np.frombuffer(fb, np.uint8), cv.IMREAD_COLOR)
-            # Keep receiving messages in a loop until disconnection.
             while True:
                 frame_bytes = await frame_queue.get()
                 # Profiling
-                t0 = time.time()
-                image_array = await loop.run_in_executor(None, decode_frame, frame_bytes)
-                decode_duration_seconds.labels(camera_id).observe(round(time.time() - t0, 3))
-                mlflow.log_metric("frame_processing_time", round(time.time() - t0, 3), next(step_counter))
                 # Apply detection models
                 t0 = time.time()
-                detection_task = loop.run_in_executor(None, detector.detect, image_array)
-                safety_task = loop.run_in_executor(None, safety_detector.detect, image_array)
-                detections, safety_detection = await asyncio.gather(detection_task, safety_task)
-                detection_duration_seconds.labels(camera_id).observe(round(time.time() - t0, 3))
-                mlflow.log_metric("detection_duration_seconds", round(time.time() - t0, 3), next(step_counter))
                 # Profiling
-                frame_processing_duration_seconds.labels(camera_id).observe(round(time.time() - t0, 3))
                 logger.debug("Frame processed", camera_id=camera_id)
-                mlflow.log_metric("frame_processing duration time", round(time.time() - t0, 3), next(step_counter))
-                boxes_center = []
-                boxes_center_ratio = []
-                for box in detections.detections:
-                    print(type(box))
-                    xmin, ymin, xmax, ymax = box.xyxy
-                    xcenter = (xmax + xmin) / 2
-                    ycenter = (ymax + ymin) / 2
-                    boxes_center.append((int(xcenter), int(ycenter)))
-                    boxes_center_ratio.append(xcenter / image_array.shape[1])
                 t0 = time.time()
-                depth_points = await loop.run_in_executor(None, depth_model.calculate_depth, image_array, boxes_center) if boxes_center else []
-                depth_duration_seconds.labels(camera_id).observe(round(time.time() - t0, 3))
-                mlflow.log_metric("depth_duration_seconds", round(time.time() - t0, 3), next(step_counter))
-                detection_metadata = [DetectionMetadata(depth=depth, xRatio=xRatio) for depth, xRatio in zip(depth_points, boxes_center_ratio)]
-                metadata = CameraMetadata(camera_id=camera_id, is_danger = True if safety_detection else False, detection_metadata=detection_metadata)
                 await redis.publish("dashboard_stream", metadata.model_dump_json())
                 # Even if the camera was disconnected, redis is still going to show its data, which is not accurate.
                 # Instead, we set expiry date for the camera data.
                 await redis.setex(
-                    f"camera:{camera_id}:latest", # And this is the key, or tag
-                    10, # in seconds
-                    metadata.model_dump_json()
                 )
                 # Note that JSONResponse doesn't work here, as it is for HTTP
                 await websocket.send_json({"status": 200, "camera_id": camera_id})
         except Exception as e:
             logger.error(f"Processing Error: {e}", camera_id=camera_id)
             raise
-    with mlflow.start_run(run_name=f'camera_{camera_id}', nested=True, parent_run_id=state.mlflow_run_id):
         log_config()
         try:
-            await asyncio.gather(
-                receive_frames(),
-                process_frames()
-            )
         except WebSocketDisconnect:
             logger.warn(f"Client ID >>{camera_id}<< Disconnected Normally...")
@@ -146,10 +189,12 @@ async def websocket_detect(
         except Exception as e:
             logger.error(f"Error in websocker, Client ID: >>{camera_id}<<: {e}")
             logger.exception(e)
-            # This one is actually really better, it shows more details about the issue happened.
             # Also work on and create the logger.exception, as it directly controls printing more details about the issue happened.
             await websocket.close()
         finally:
-            await redis.srem("cameras:active", camera_id) # Remove the camera from redis connected cameras
-            active_cameras.dec()

+from backend.domain.detection_box_center import calculate_detection_box_center
 from api.dependencies import get_safety_detection_model
 from api.dependencies import get_detection_model, get_depth_model
 import asyncio
 import itertools
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends
+from api.routers.metrics import (
+    active_cameras,
+    decode_duration_seconds,
+    depth_duration_seconds,
+    detection_duration_seconds,
+    frame_processing_duration_seconds,
+)
 from contracts.camera_metadata import CameraMetadata, DetectionMetadata
 import mlflow
 from utils.experiment import log_config
 router = APIRouter()
 @router.websocket("/stream/{camera_id}")
 async def websocket_detect(
+    websocket: WebSocket,
+    camera_id: str,
     detector=Depends(get_detection_model),
     safety_detector=Depends(get_safety_detection_model),
+    depth_model=Depends(get_depth_model),
+):
     """
+    WebSocket stream takes the frame pass it to the ai models, save it under the camera id provided in the url.
      url here is:  ws://127.0.0.1:8000/detectors/stream/camera_id
     """
+    # Yes, I asked the same questions, is using webscoket.app.state many times here is consuming. after checking, it is not performance consuming.
     state = websocket.app.state
     logger = state.logger
     # Using Depends is important and called Inversion Of Control (IoC)/ Dependency injection, and is important for testing.
     # Accepting the connection from the client
     await websocket.accept()
+    # Logging and tracking action
     active_cameras.inc()
+    await redis.sadd(
+        "cameras:active", camera_id
+    )  # Save connected camera name into redis
     logger.info(f"Client ID >>{camera_id}<< Connected...")
     step_counter = itertools.count()
+    loop = asyncio.get_running_loop()
     # Queue removing old images in case they were being stacked
     frame_queue: asyncio.Queue = asyncio.Queue(maxsize=1)
                         logger.debug("Frame Dropped", camera_id=camera_id)
                     except asyncio.QueueEmpty:
                         pass
                 await frame_queue.put(frame_bytes)
         except WebSocketDisconnect:
             raise
     async def process_frames():
         try:
             logger.info(f"Camera {camera_id} start sending frames...")
+            def decode_frame(fb):
+                return cv.imdecode(np.frombuffer(fb, np.uint8), cv.IMREAD_COLOR)
+            # Keep receiving messages in a loop until disconnection.
             while True:
                 frame_bytes = await frame_queue.get()
                 # Profiling
+                t0 = time.time()
+                image_array = await loop.run_in_executor(
+                    None, decode_frame, frame_bytes
+                )
+                decode_duration_seconds.labels(camera_id).observe(
+                    round(time.time() - t0, 3)
+                )
+                mlflow.log_metric(
+                    "frame_processing_time",
+                    round(time.time() - t0, 3),
+                    next(step_counter),
+                )
                 # Apply detection models
                 t0 = time.time()
+                detection_task = loop.run_in_executor(
+                    None, detector.detect, image_array
+                )
+                safety_task = loop.run_in_executor(
+                    None, safety_detector.detect, image_array
+                )
+                detections, safety_detection = await asyncio.gather(
+                    detection_task, safety_task
+                )
+                detection_duration_seconds.labels(camera_id).observe(
+                    round(time.time() - t0, 3)
+                )
+                mlflow.log_metric(
+                    "detection_duration_seconds",
+                    round(time.time() - t0, 3),
+                    next(step_counter),
+                )
                 # Profiling
+                frame_processing_duration_seconds.labels(camera_id).observe(
+                    round(time.time() - t0, 3)
+                )
                 logger.debug("Frame processed", camera_id=camera_id)
+                mlflow.log_metric(
+                    "frame_processing duration time",
+                    round(time.time() - t0, 3),
+                    next(step_counter),
+                )
+                boxes_center, boxes_center_ratio = calculate_detection_box_center(detections.detections, image_array.shape[1])
                 t0 = time.time()
+                depth_points = (
+                    await loop.run_in_executor(
+                        None, depth_model.calculate_depth, image_array, boxes_center
+                    )
+                    if boxes_center
+                    else []
+                )
+                depth_duration_seconds.labels(camera_id).observe(
+                    round(time.time() - t0, 3)
+                )
+                mlflow.log_metric(
+                    "depth_duration_seconds",
+                    round(time.time() - t0, 3),
+                    next(step_counter),
+                )
+                detection_metadata = [
+                    DetectionMetadata(depth=depth, xRatio=xRatio)
+                    for depth, xRatio in zip(depth_points, boxes_center_ratio)
+                ]
+                metadata = CameraMetadata(
+                    camera_id=camera_id,
+                    is_danger=True if safety_detection else False,
+                    detection_metadata=detection_metadata,
+                )
                 await redis.publish("dashboard_stream", metadata.model_dump_json())
                 # Even if the camera was disconnected, redis is still going to show its data, which is not accurate.
                 # Instead, we set expiry date for the camera data.
                 await redis.setex(
+                    f"camera:{camera_id}:latest",  # And this is the key, or tag
+                    10,  # in seconds
+                    metadata.model_dump_json(),
                 )
                 # Note that JSONResponse doesn't work here, as it is for HTTP
                 await websocket.send_json({"status": 200, "camera_id": camera_id})
         except Exception as e:
             logger.error(f"Processing Error: {e}", camera_id=camera_id)
             raise
+    with mlflow.start_run(
+        run_name=f"camera_{camera_id}", nested=True, parent_run_id=state.mlflow_run_id
+    ):
         log_config()
         try:
+            await asyncio.gather(receive_frames(), process_frames())
         except WebSocketDisconnect:
             logger.warn(f"Client ID >>{camera_id}<< Disconnected Normally...")
         except Exception as e:
             logger.error(f"Error in websocker, Client ID: >>{camera_id}<<: {e}")
             logger.exception(e)
+            # This one is actually really better, it shows more details about the issue happened.
             # Also work on and create the logger.exception, as it directly controls printing more details about the issue happened.
             await websocket.close()
         finally:
+            await redis.srem(
+                "cameras:active", camera_id
+            )  # Remove the camera from redis connected cameras
+            active_cameras.dec()

api/routers/dashboard_stream.py CHANGED Viewed

@@ -1,15 +1,14 @@
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect
-from api.routers.metrics import active_dashboards
 import asyncio
-import traceback
-import redis.asyncio as aioredis
 router = APIRouter()
 @router.websocket("/stream")
 async def dashboard_websocket(websocket: WebSocket):
     """
-    WebScoket sending updates to the dashboard.
     url: ws://127.0.0.1:8000/dashboard/stream
     """
@@ -17,10 +16,10 @@ async def dashboard_websocket(websocket: WebSocket):
     logger = state.logger
     redis = state.redis
-    # Accept the client connection.
     await websocket.accept()
-    # Logging and tracking
     active_dashboards.inc()
     logger.info("Dashboard Connected...")
@@ -28,7 +27,6 @@ async def dashboard_websocket(websocket: WebSocket):
     await pubsub.subscribe("dashboard_stream")
     try:
         while True:
             message = await pubsub.get_message(ignore_subscribe_messages=True)
@@ -36,8 +34,7 @@ async def dashboard_websocket(websocket: WebSocket):
                 logger.debug("Sending updates to Dashboard...")
                 await websocket.send_text(message["data"])
-            await asyncio.sleep(0.01)  # giving time to detect server disconnection.
     except WebSocketDisconnect:
         logger.warn("Dashboard Disconnected Normally...")
@@ -49,4 +46,4 @@ async def dashboard_websocket(websocket: WebSocket):
     finally:
         active_dashboards.dec()
         await pubsub.unsubscribe("dashboard_stream")
-        await pubsub.close()

 from fastapi import APIRouter, WebSocket, WebSocketDisconnect
+from api.routers.metrics import active_dashboards
 import asyncio
 router = APIRouter()
 @router.websocket("/stream")
 async def dashboard_websocket(websocket: WebSocket):
     """
+    WebScoket sending updates to the dashboard.
     url: ws://127.0.0.1:8000/dashboard/stream
     """
     logger = state.logger
     redis = state.redis
+    # Accept the client connection.
     await websocket.accept()
+    # Logging and tracking
     active_dashboards.inc()
     logger.info("Dashboard Connected...")
     await pubsub.subscribe("dashboard_stream")
     try:
         while True:
             message = await pubsub.get_message(ignore_subscribe_messages=True)
                 logger.debug("Sending updates to Dashboard...")
                 await websocket.send_text(message["data"])
+            await asyncio.sleep(0.01)  # giving time to detect server disconnection.
     except WebSocketDisconnect:
         logger.warn("Dashboard Disconnected Normally...")
     finally:
         active_dashboards.dec()
         await pubsub.unsubscribe("dashboard_stream")
+        await pubsub.close()

api/routers/health.py CHANGED Viewed

@@ -1,6 +1,6 @@
-# Very simple and important file, uesd to check the api health, if it return 200 everything is great, otherwise, there is an issue.
-# This file is being used mostly in HTTP and not websockets.
-# Health check is being used for example by docker, to check is dependencies are working fine, if not, he might restart.
 from requests import Request
 from http import HTTPStatus
@@ -11,12 +11,13 @@ from api.routers.metrics import active_cameras
 router = APIRouter()
 @router.get("/")
 @router.get("/live")
 async def live_check(response: Response):
     """
-    Prove that the process is running, No logic requried here.
-    Confirming that the server is not dead.
     It is fails, container killed and restarted..
     Has to be very cheap.
     """
@@ -25,14 +26,15 @@ async def live_check(response: Response):
     return {
         "status": "live",
         "active_cameras": active_cameras._value.get(),
-        "timestamp": datetime.now().isoformat()
-        }
 @router.get("/ready")
 async def ready_check(response: Response, request: Request):
     """
-    Checck if parts work here, ex. are data readable.
-    Are data readable here.
     Also can this instance accept traffic right now, or send them to another healthy instance.
     """
@@ -43,7 +45,7 @@ async def ready_check(response: Response, request: Request):
     try:
         await request.app.state.redis.ping()
         checks["redis"] = "Good"
-    except Exception as e:
         checks["redis"] = "unreachable"
         healthy = False
@@ -55,13 +57,15 @@ async def ready_check(response: Response, request: Request):
         checks["detection_model"] = "can't load"
         healthy = False
-    checks["active_cameras"] = list(await request.app.state.redis.smembers("cameras:active"))
     response.status_code = HTTPStatus.OK if healthy else HTTPStatus.SERVICE_UNAVAILABLE
     return {
         "status": "ready" if healthy else "degraded",
         "checks": checks,
-        "timestamp": datetime.now().isoformat(),   # Sending the time also is a good practise
         "version": "1.0.0",
-        }

+# Very simple and important file, uesd to check the api health, if it return 200 everything is great, otherwise, there is an issue.
+# This file is being used mostly in HTTP and not websockets.
+# Health check is being used for example by docker, to check is dependencies are working fine, if not, he might restart.
 from requests import Request
 from http import HTTPStatus
 router = APIRouter()
 @router.get("/")
 @router.get("/live")
 async def live_check(response: Response):
     """
+    Prove that the process is running, No logic requried here.
+    Confirming that the server is not dead.
     It is fails, container killed and restarted..
     Has to be very cheap.
     """
     return {
         "status": "live",
         "active_cameras": active_cameras._value.get(),
+        "timestamp": datetime.now().isoformat(),
+    }
 @router.get("/ready")
 async def ready_check(response: Response, request: Request):
     """
+    Checck if parts work here, ex. are data readable.
+    Are data readable here.
     Also can this instance accept traffic right now, or send them to another healthy instance.
     """
     try:
         await request.app.state.redis.ping()
         checks["redis"] = "Good"
+    except Exception:
         checks["redis"] = "unreachable"
         healthy = False
         checks["detection_model"] = "can't load"
         healthy = False
+    checks["active_cameras"] = list(
+        await request.app.state.redis.smembers("cameras:active")
+    )
     response.status_code = HTTPStatus.OK if healthy else HTTPStatus.SERVICE_UNAVAILABLE
     return {
         "status": "ready" if healthy else "degraded",
         "checks": checks,
+        "timestamp": datetime.now().isoformat(),  # Sending the time also is a good practise
         "version": "1.0.0",
+    }

api/routers/metrics.py CHANGED Viewed

@@ -1,45 +1,35 @@
-# Prometheus is for real-time system health.
 # Grafana visualize the output of Prometheus
 # This is considered as Monitoring
-from prometheus_client import Counter, Histogram, Gauge, make_asgi_app
 metrics_asgi_app = make_asgi_app()
 active_cameras = Gauge(
-    "active_camera_connections",
-    "Number of Currently Connected camera websockets"
 )
 active_dashboards = Gauge(
-    "active_dashboards",
-    "Number of active dashboards which fetching data"
 )
 frame_processing_duration_seconds = Histogram(
-    "frame_processing_duration_seconds",
-    "Time to process one frame",
-    ["camera_id"]
 )
 decode_duration_seconds = Histogram(
-    "decode_duration_seconds",
-    "Time to decode one image",
-    ["camera_id"]
 )
 detection_duration_seconds = Histogram(
-    "detection_duration_seconds",
-    "Time to detect",
-    ["camera_id"]
 )
 depth_duration_seconds = Histogram(
-    "depth_duration_seconds",
-    "Time to calculate the depth",
-    ["camera_id"]
 )
 cpu_usage = Gauge("cpu_usage_percent", "CPU usage %")
 mem_usage = Gauge("mem_usage_percent", "mem usage %")
-active_workers = Gauge("active_workers", "Active threads")

+# Prometheus is for real-time system health.
 # Grafana visualize the output of Prometheus
 # This is considered as Monitoring
+from prometheus_client import Histogram, Gauge, make_asgi_app
 metrics_asgi_app = make_asgi_app()
 active_cameras = Gauge(
+    "active_camera_connections", "Number of Currently Connected camera websockets"
 )
 active_dashboards = Gauge(
+    "active_dashboards", "Number of active dashboards which fetching data"
 )
 frame_processing_duration_seconds = Histogram(
+    "frame_processing_duration_seconds", "Time to process one frame", ["camera_id"]
 )
 decode_duration_seconds = Histogram(
+    "decode_duration_seconds", "Time to decode one image", ["camera_id"]
 )
 detection_duration_seconds = Histogram(
+    "detection_duration_seconds", "Time to detect", ["camera_id"]
 )
 depth_duration_seconds = Histogram(
+    "depth_duration_seconds", "Time to calculate the depth", ["camera_id"]
 )
 cpu_usage = Gauge("cpu_usage_percent", "CPU usage %")
 mem_usage = Gauge("mem_usage_percent", "mem usage %")
+active_workers = Gauge("active_workers", "Active threads")

config/settings.py CHANGED Viewed

@@ -1,9 +1,17 @@
 from pathlib import Path
 from typing import Literal, List
 from pydantic import BaseModel
-from pydantic_settings import BaseSettings, DotEnvSettingsSource, EnvSettingsSource, SettingsConfigDict, PydanticBaseSettingsSource, YamlConfigSettingsSource
 import yaml
 def join_tag(loader, node):
     """
     Help joining pathes in config.YAML directly.
@@ -12,16 +20,20 @@ def join_tag(loader, node):
     path = Path(*(str(part) for part in parts)).resolve()
     return str(path)
-# It didn't work before, After some research, .SafeLoaded is unmentioned must for my case.
 yaml.SafeLoader.add_constructor("!join", join_tag)
 class IntervalsConfig(BaseModel):
     system_metrics_seconds: float
     frames_summary_every: int
     realtime_updates_every: float
 class YoloConfig(BaseModel):
     """Contains yolo configurations"""
     model_name: str
     classes: List[str]
     batch_size: int
@@ -30,13 +42,17 @@ class YoloConfig(BaseModel):
     augment: bool
     data_path: str
 class SecurityDetector(BaseModel):
     "Contains Security Detectors like Smoke - Fire"
     model_name: str
     classes: List[str]
 class DepthConfig(BaseModel):
     "Contains depths estimation configurations"
     model_name: str
     device: Literal["cuda", "cpu"]
     encoder: Literal["vits", "vitb", "vitl", "vitg"]
@@ -49,45 +65,47 @@ class AppConfig(BaseSettings):
     - Override values with .env
     """
-    # Note that it doesn't show error, Take care.
     model_config = SettingsConfigDict(
         env_file=Path(__file__).parent / ".env",
         env_file_encoding="utf-8",
         yaml_file=Path(__file__).parent / "config.yaml",
-        extra="ignore"   # Ignore other settings in yaml and env as they are not mentioedhere
     )
-    project_name:str
-    project_desc:str
     task: Literal["indoor", "outdoor"]
     yolo: YoloConfig
     security_detector: SecurityDetector
     depth: DepthConfig
     intervals: IntervalsConfig
-    redis_url:str
     @classmethod
-    def settings_customise_sources(cls,
         settings_cls: type[BaseSettings],  # Base param.
-        **kwargs
-        ) -> tuple[PydanticBaseSettingsSource, ...] :
         """
         Once you use this, no need to use load_config, it is already the same.
-        But this time it fixs the priority part, order by parameters priority.
         """
         # Order by priority (first, more important)
         return (
-            DotEnvSettingsSource(settings_cls),    # Most important
-            EnvSettingsSource(settings_cls),       # This allow for ex. hugging face to override .env values with its values.
-            YamlConfigSettingsSource(settings_cls),
-            )  # The return must be a tuple
-if __name__ == "__main__":
-    # Trying to checking both yaml and .env.   This works really fine now.
     config = AppConfig()
     print(config.model_dump())
-    print(config.model_dump()["project_name"])

 from pathlib import Path
 from typing import Literal, List
 from pydantic import BaseModel
+from pydantic_settings import (
+    BaseSettings,
+    DotEnvSettingsSource,
+    EnvSettingsSource,
+    SettingsConfigDict,
+    PydanticBaseSettingsSource,
+    YamlConfigSettingsSource,
+)
 import yaml
 def join_tag(loader, node):
     """
     Help joining pathes in config.YAML directly.
     path = Path(*(str(part) for part in parts)).resolve()
     return str(path)
+# It didn't work before, After some research, .SafeLoaded is unmentioned must for my case.
 yaml.SafeLoader.add_constructor("!join", join_tag)
 class IntervalsConfig(BaseModel):
     system_metrics_seconds: float
     frames_summary_every: int
     realtime_updates_every: float
 class YoloConfig(BaseModel):
     """Contains yolo configurations"""
     model_name: str
     classes: List[str]
     batch_size: int
     augment: bool
     data_path: str
 class SecurityDetector(BaseModel):
     "Contains Security Detectors like Smoke - Fire"
     model_name: str
     classes: List[str]
 class DepthConfig(BaseModel):
     "Contains depths estimation configurations"
     model_name: str
     device: Literal["cuda", "cpu"]
     encoder: Literal["vits", "vitb", "vitl", "vitg"]
     - Override values with .env
     """
+    # Note that it doesn't show error, Take care.
     model_config = SettingsConfigDict(
         env_file=Path(__file__).parent / ".env",
         env_file_encoding="utf-8",
         yaml_file=Path(__file__).parent / "config.yaml",
+        extra="ignore",  # Ignore other settings in yaml and env as they are not mentioedhere
     )
+    project_name: str
+    project_desc: str
     task: Literal["indoor", "outdoor"]
     yolo: YoloConfig
     security_detector: SecurityDetector
     depth: DepthConfig
     intervals: IntervalsConfig
+    redis_url: str
     @classmethod
+    def settings_customise_sources(
+        cls,
         settings_cls: type[BaseSettings],  # Base param.
+        **kwargs,
+    ) -> tuple[PydanticBaseSettingsSource, ...]:
         """
         Once you use this, no need to use load_config, it is already the same.
+        But this time it fixs the priority part, order by parameters priority.
         """
         # Order by priority (first, more important)
         return (
+            DotEnvSettingsSource(settings_cls),  # Most important
+            EnvSettingsSource(
+                settings_cls
+            ),  # This allow for ex. hugging face to override .env values with its values.
+            YamlConfigSettingsSource(settings_cls),
+        )  # The return must be a tuple
+if __name__ == "__main__":
+    # Trying to checking both yaml and .env.   This works really fine now.
     config = AppConfig()
     print(config.model_dump())
+    print(config.model_dump()["project_name"])

contracts/camera_metadata.py CHANGED Viewed

@@ -1,11 +1,13 @@
 from typing import List
 from pydantic import BaseModel
 class DetectionMetadata(BaseModel):
     depth: float
     xRatio: float
 class CameraMetadata(BaseModel):
     camera_id: str
     is_danger: bool = False
-    detection_metadata: List[DetectionMetadata]

 from typing import List
 from pydantic import BaseModel
 class DetectionMetadata(BaseModel):
     depth: float
     xRatio: float
 class CameraMetadata(BaseModel):
     camera_id: str
     is_danger: bool = False
+    detection_metadata: List[DetectionMetadata]

domain/detection_box_center.py ADDED Viewed

	@@ -0,0 +1,12 @@

+def calculate_detection_box_center(detections, image_width:float):
+    boxes_center = []
+    boxes_center_ratio = []
+    for box in detections.detections:
+        xmin, ymin, xmax, ymax = box.xyxy
+        xcenter = (xmax + xmin) / 2
+        ycenter = (ymax + ymin) / 2
+        boxes_center.append((int(xcenter), int(ycenter)))
+        boxes_center_ratio.append(xcenter / image_width)
+    return (boxes_center,  boxes_center_ratio)

domain/logger.py CHANGED Viewed

@@ -2,20 +2,20 @@ from abc import ABC, abstractmethod
 class Logger(ABC):
     @abstractmethod
-    def info(self, msg:str, **kwargs):
         pass
-    def debug(self, msg:str, **kwargs):
         pass
     @abstractmethod
-    def error(self, msg:str, **kwargs):
         pass
     @abstractmethod
-    def warn(self, msg:str, **kwargs):
         pass
     @abstractmethod
-    def exception(self, msg:str, **kwargs):
         pass

 class Logger(ABC):
     @abstractmethod
+    def info(self, msg: str, **kwargs):
         pass
+    def debug(self, msg: str, **kwargs):
         pass
     @abstractmethod
+    def error(self, msg: str, **kwargs):
         pass
     @abstractmethod
+    def warn(self, msg: str, **kwargs):
         pass
     @abstractmethod
+    def exception(self, msg: str, **kwargs):
         pass

infra/logger_structlog.py CHANGED Viewed

@@ -4,36 +4,42 @@ from domain.logger import Logger
 from pathlib import Path
 import logging
-# Don't forget to keep logs.json file meaningful.
 def setup_logging(logs_path: Path | str):
     # log_file = open(logs_path, "a", encoding="utf-8")
     structlog.configure(
-        processors = [
             structlog.processors.StackInfoRenderer(),  # Stack strace, showing the exact source of errors.
             structlog.processors.format_exc_info,  # for Exceptions in JSON
             structlog.processors.add_log_level,  # Adding log level (info, warning, error)
-            structlog.processors.TimeStamper(fmt="iso", utc=True), # Adding ISO timestamp
             structlog.processors.JSONRenderer(),  # Makes JSON outputs
         ],
-        wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),  # Profiling info and higher.
         # logger_factory = structlog.WriteLoggerFactory(file=log_file),    # Save in file instead of terminal
         cache_logger_on_first_use=True,  # Caching being used for optimization
     )
 class StructLogger(Logger):
-    def __init__(self, settings:AppConfig):
         setup_logging(logs_path="")
         self._logger = structlog.get_logger()
-    def info(self, message:str, **kwargs):
         print(message)
         self._logger.info(message, **kwargs)
-    def debug(self, message:str, **kwargs):
         print(message)
         self._logger.debug(message, **kwargs)
-    def error(self, message:str, **kwargs):
         print(message)
         self._logger.error(message, **kwargs)
@@ -42,6 +48,6 @@ class StructLogger(Logger):
         print(message)
         self._logger.warn(message, **kwargs)
-    def exception(self, message:str, **kwargs):
         print(message)
-        self._logger.exception(message, **kwargs)

 from pathlib import Path
 import logging
+# Don't forget to keep logs.json file meaningful.
 def setup_logging(logs_path: Path | str):
     # log_file = open(logs_path, "a", encoding="utf-8")
     structlog.configure(
+        processors=[
             structlog.processors.StackInfoRenderer(),  # Stack strace, showing the exact source of errors.
             structlog.processors.format_exc_info,  # for Exceptions in JSON
             structlog.processors.add_log_level,  # Adding log level (info, warning, error)
+            structlog.processors.TimeStamper(
+                fmt="iso", utc=True
+            ),  # Adding ISO timestamp
             structlog.processors.JSONRenderer(),  # Makes JSON outputs
         ],
+        wrapper_class=structlog.make_filtering_bound_logger(
+            logging.INFO
+        ),  # Profiling info and higher.
         # logger_factory = structlog.WriteLoggerFactory(file=log_file),    # Save in file instead of terminal
         cache_logger_on_first_use=True,  # Caching being used for optimization
     )
 class StructLogger(Logger):
+    def __init__(self, settings: AppConfig):
         setup_logging(logs_path="")
         self._logger = structlog.get_logger()
+    def info(self, message: str, **kwargs):
         print(message)
         self._logger.info(message, **kwargs)
+    def debug(self, message: str, **kwargs):
         print(message)
         self._logger.debug(message, **kwargs)
+    def error(self, message: str, **kwargs):
         print(message)
         self._logger.error(message, **kwargs)
         print(message)
         self._logger.warn(message, **kwargs)
+    def exception(self, message: str, **kwargs):
         print(message)
+        self._logger.exception(message, **kwargs)

infra/system_metrics.py CHANGED Viewed

@@ -1,19 +1,21 @@
 from domain.logger import Logger
 import psutil
 import asyncio
-from api.routers.metrics import active_workers, cpu_usage, mem_usage
-async def log_system_metrics(logger:Logger, logger_interval_sec:float):
     while True:
         cpu = psutil.cpu_percent(interval=1)
         mem = psutil.virtual_memory()
         # Structlog Logging
-        logger.info("System Metrics",
             cpu_percent=cpu,
             memtory_percent=mem.percent,
             memory_used_gb=round(mem.used / (1024**3), 2),
-            memory_total_gb=round(mem.total / (1024**3), 2)
         )
         # Prometheus
@@ -22,4 +24,4 @@ async def log_system_metrics(logger:Logger, logger_interval_sec:float):
         # active_workers.set()
-        await asyncio.sleep(logger_interval_sec)

 from domain.logger import Logger
 import psutil
 import asyncio
+from api.routers.metrics import cpu_usage, mem_usage
+async def log_system_metrics(logger: Logger, logger_interval_sec: float):
     while True:
         cpu = psutil.cpu_percent(interval=1)
         mem = psutil.virtual_memory()
         # Structlog Logging
+        logger.info(
+            "System Metrics",
             cpu_percent=cpu,
             memtory_percent=mem.percent,
             memory_used_gb=round(mem.used / (1024**3), 2),
+            memory_total_gb=round(mem.total / (1024**3), 2),
         )
         # Prometheus
         # active_workers.set()
+        await asyncio.sleep(logger_interval_sec)

main.py CHANGED Viewed

@@ -3,7 +3,6 @@ from ai.depth.depth_anything import DepthAnything
 from ai.detectors.yolo_detector import YOLO_Detector
 from config.settings import AppConfig
 from api.routers.metrics import metrics_asgi_app
-from infra.system_metrics import log_system_metrics
 from api.routers import camera_stream
 from api.routers import dashboard_stream
 from api.routers import health
@@ -23,29 +22,39 @@ async def lifespan(app: FastAPI):
     """
     This is on_event("startup") new alternative, Make sure you load models here.
     """
     settings = AppConfig()
     logger = StructLogger(settings=settings)
     # Using this way to can store data. it is acts as a dict which holds instances
     app.state.logger = logger
     app.state.settings = settings
     app.state.mlflow_run_id = parent_run.info.run_id
     logger.info("Starting Server.... ")
-    # asyncio.create_task(log_system_metrics(logger, logger_interval_sec=settings.intervals.system_metrics_seconds))
-    detection_model_path = hf_fetch_model(repo_id="Ultralytics/YOLO26", filename=settings.yolo.model_name)
     app.state.detection_model = YOLO_Detector(detection_model_path)
-    depth_model_path = hf_fetch_model(repo_id="depth-anything/Depth-Anything-V2-Small", filename=settings.depth.model_name)
-    app.state.depth_model = DepthAnything(encoder=settings.depth.encoder, depth_model_path=depth_model_path, DEVICE=settings.depth.device)
-    safety_detection_path = hf_fetch_model(repo_id="e1250/safety_detection", filename=settings.security_detector.model_name)
     app.state.safety_detection_model = YOLO_Detector(safety_detection_path)
     app.state.redis = aioredis.from_url(settings.redis_url, decode_responses=True)
-    # Checking connection to redis - TODO add to health check
     try:
         await app.state.redis.ping()
         logger.info("Redis connected successfully...")
@@ -60,8 +69,9 @@ async def lifespan(app: FastAPI):
     torch.cuda.empty_cache()
     await app.state.redis.close()
 # MLFlow setup
-dagshub.init(repo_owner='eslam760000', repo_name='p-tracking_system', mlflow=True)
 mlflow.set_tracking_uri("sqlite:///config/logs/mlflow.db")
 mlflow.set_experiment("realtime-detection-system")
 parent_run = mlflow.start_run(run_name="server_session")
@@ -71,20 +81,20 @@ app = FastAPI(
     title="Tracking System Backend",
     description="real-time frame processing API",
     version="0.1.0",
-    lifespan=lifespan
-    )
-app.add_middleware(
-    TrustedHostMiddleware,
-    allowed_hosts=["*"]
 )
 # Routes
-app.mount("/metrics", metrics_asgi_app)    # Starting Prometheus server attached to my server.
 app.include_router(camera_stream.router, prefix="/detectors")
 app.include_router(dashboard_stream.router, prefix="/dashboard")
 app.include_router(health.router, prefix="/health")
 @app.get("/")
 async def root():
-    return {"status": "Real-Time tracker backend is running..."}

 from ai.detectors.yolo_detector import YOLO_Detector
 from config.settings import AppConfig
 from api.routers.metrics import metrics_asgi_app
 from api.routers import camera_stream
 from api.routers import dashboard_stream
 from api.routers import health
     """
     This is on_event("startup") new alternative, Make sure you load models here.
     """
     settings = AppConfig()
     logger = StructLogger(settings=settings)
     # Using this way to can store data. it is acts as a dict which holds instances
     app.state.logger = logger
     app.state.settings = settings
     app.state.mlflow_run_id = parent_run.info.run_id
     logger.info("Starting Server.... ")
+    # asyncio.create_task(log_system_metrics(logger, logger_interval_sec=settings.intervals.system_metrics_seconds))
+    detection_model_path = hf_fetch_model(
+        repo_id="Ultralytics/YOLO26", filename=settings.yolo.model_name
+    )
     app.state.detection_model = YOLO_Detector(detection_model_path)
+    depth_model_path = hf_fetch_model(
+        repo_id="depth-anything/Depth-Anything-V2-Small",
+        filename=settings.depth.model_name,
+    )
+    app.state.depth_model = DepthAnything(
+        encoder=settings.depth.encoder,
+        depth_model_path=depth_model_path,
+        DEVICE=settings.depth.device,
+    )
+    safety_detection_path = hf_fetch_model(
+        repo_id="e1250/safety_detection", filename=settings.security_detector.model_name
+    )
     app.state.safety_detection_model = YOLO_Detector(safety_detection_path)
     app.state.redis = aioredis.from_url(settings.redis_url, decode_responses=True)
+    # Checking connection to redis - TODO add to health check
     try:
         await app.state.redis.ping()
         logger.info("Redis connected successfully...")
     torch.cuda.empty_cache()
     await app.state.redis.close()
 # MLFlow setup
+dagshub.init(repo_owner="eslam760000", repo_name="p-tracking_system", mlflow=True)
 mlflow.set_tracking_uri("sqlite:///config/logs/mlflow.db")
 mlflow.set_experiment("realtime-detection-system")
 parent_run = mlflow.start_run(run_name="server_session")
     title="Tracking System Backend",
     description="real-time frame processing API",
     version="0.1.0",
+    lifespan=lifespan,
 )
+app.add_middleware(TrustedHostMiddleware, allowed_hosts=["*"])
 # Routes
+app.mount(
+    "/metrics", metrics_asgi_app
+)  # Starting Prometheus server attached to my server.
 app.include_router(camera_stream.router, prefix="/detectors")
 app.include_router(dashboard_stream.router, prefix="/dashboard")
 app.include_router(health.router, prefix="/health")
 @app.get("/")
 async def root():
+    return {"status": "Real-Time tracker backend is running..."}

utils/experiment.py CHANGED Viewed

@@ -2,11 +2,14 @@ from backend.config.settings import AppConfig
 import mlflow
 config = AppConfig()
 def log_config():
     mlflow.log_param("Detector", config.yolo.model_name)
     mlflow.log_param("Safety Model", config.security_detector.model_name)
     mlflow.log_param("Depth Model", config.depth.model_name)
-def log_metrics(metrics:dict):
     for k, v in metrics.items():
-        mlflow.log_metric(k, v)

 import mlflow
 config = AppConfig()
 def log_config():
     mlflow.log_param("Detector", config.yolo.model_name)
     mlflow.log_param("Safety Model", config.security_detector.model_name)
     mlflow.log_param("Depth Model", config.depth.model_name)
+def log_metrics(metrics: dict):
     for k, v in metrics.items():
+        mlflow.log_metric(k, v)