| import chromadb |
| import json |
|
|
| COLLECTION_NAME = 'video_analysis_data' |
| |
|
|
| def generate_text_summary(record): |
| """ |
| Converts a structured detection record into a natural language text description |
| by summarizing all detected objects clearly. |
| """ |
| video_id = record['video_id'] |
| timestamp = record['timestamp_sec'] |
| detections = record['detections'] |
| |
| if not detections: |
| return f"Analysis of video '{video_id}' at {timestamp} seconds: No objects were detected in this frame." |
|
|
| |
| object_counts = {} |
| for det in detections: |
| label = det['label'] |
| object_counts[label] = object_counts.get(label, 0) + 1 |
|
|
| summary_parts = [] |
| |
| if object_counts: |
| |
| object_descriptions = [ |
| f"{count} instances of '{label}'" |
| for label, count in object_counts.items() |
| ] |
| summary_parts.append("Detected objects include: " + ", ".join(object_descriptions) + ".") |
|
|
| summary_doc = f"Analysis of video '{video_id}' at {timestamp} seconds: {' '.join(summary_parts)}" |
| return summary_doc |
|
|
|
|
| def index_analysis_data(json_file='raw_analysis.json', collection_name='video_analysis_data'): |
| """ |
| Loads raw analysis, generates documents, and indexes them in ChromaDB. |
| """ |
| try: |
| with open(json_file, 'r') as f: |
| raw_data = json.load(f) |
| except FileNotFoundError: |
| print(f"Error: {json_file} not found. Run 'video_analyzer.py' first.") |
| return |
|
|
| |
| client = chromadb.PersistentClient(path="./chroma_db") |
| |
| collection = client.get_or_create_collection(name=collection_name) |
| |
| documents = [] |
| metadatas = [] |
| ids = [] |
| |
| print(f"Indexing {len(raw_data)} analysis records...") |
|
|
| for i, record in enumerate(raw_data): |
| doc_text = generate_text_summary(record) |
| if doc_text: |
| documents.append(doc_text) |
| |
| metadatas.append({ |
| 'video_id': record['video_id'], |
| 'timestamp_sec': record['timestamp_sec'], |
| 'frame_id': record['frame_id'] |
| }) |
| ids.append(f"doc_{i}") |
|
|
| |
| if documents: |
| collection.add( |
| documents=documents, |
| metadatas=metadatas, |
| ids=ids |
| ) |
| print(f"Successfully indexed {len(documents)} documents into ChromaDB collection '{collection_name}'.") |
| else: |
| print("No valid documents generated for indexing.") |
|
|
|
|
| if __name__ == '__main__': |
| index_analysis_data() |