Madras1 commited on
Commit
e7f9d7f
·
verified ·
1 Parent(s): 29119eb

Upload 45 files

Browse files
Files changed (45) hide show
  1. Dockerfile +24 -0
  2. README.md +39 -11
  3. app/__init__.py +1 -0
  4. app/__pycache__/__init__.cpython-311.pyc +0 -0
  5. app/__pycache__/config.cpython-311.pyc +0 -0
  6. app/__pycache__/main.cpython-311.pyc +0 -0
  7. app/api/__init__.py +1 -0
  8. app/api/__pycache__/__init__.cpython-311.pyc +0 -0
  9. app/api/routes/__init__.py +2 -0
  10. app/api/routes/__pycache__/__init__.cpython-311.pyc +0 -0
  11. app/api/routes/__pycache__/entities.cpython-311.pyc +0 -0
  12. app/api/routes/__pycache__/events.cpython-311.pyc +0 -0
  13. app/api/routes/__pycache__/ingest.cpython-311.pyc +0 -0
  14. app/api/routes/__pycache__/relationships.cpython-311.pyc +0 -0
  15. app/api/routes/__pycache__/search.cpython-311.pyc +0 -0
  16. app/api/routes/entities.py +163 -0
  17. app/api/routes/events.py +116 -0
  18. app/api/routes/ingest.py +163 -0
  19. app/api/routes/relationships.py +71 -0
  20. app/api/routes/search.py +133 -0
  21. app/config.py +37 -0
  22. app/core/__init__.py +2 -0
  23. app/core/__pycache__/__init__.cpython-311.pyc +0 -0
  24. app/core/__pycache__/database.cpython-311.pyc +0 -0
  25. app/core/database.py +38 -0
  26. app/main.py +90 -0
  27. app/models/__init__.py +2 -0
  28. app/models/__pycache__/__init__.cpython-311.pyc +0 -0
  29. app/models/__pycache__/entity.cpython-311.pyc +0 -0
  30. app/models/entity.py +135 -0
  31. app/schemas/__init__.py +10 -0
  32. app/schemas/__pycache__/__init__.cpython-311.pyc +0 -0
  33. app/schemas/__pycache__/schemas.cpython-311.pyc +0 -0
  34. app/schemas/schemas.py +163 -0
  35. app/services/__init__.py +1 -0
  36. app/services/__pycache__/__init__.cpython-311.pyc +0 -0
  37. app/services/analysis/__init__.py +1 -0
  38. app/services/ingestion/__init__.py +3 -0
  39. app/services/ingestion/__pycache__/__init__.cpython-311.pyc +0 -0
  40. app/services/ingestion/__pycache__/news.cpython-311.pyc +0 -0
  41. app/services/ingestion/__pycache__/wikipedia.cpython-311.pyc +0 -0
  42. app/services/ingestion/news.py +86 -0
  43. app/services/ingestion/wikipedia.py +186 -0
  44. data/numidium.db +0 -0
  45. requirements.txt +12 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ gcc \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements first for better caching
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy application code
15
+ COPY . .
16
+
17
+ # Create data directory for SQLite
18
+ RUN mkdir -p /app/data
19
+
20
+ # Expose port (HF Spaces uses 7860)
21
+ EXPOSE 7860
22
+
23
+ # Run the application
24
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,39 @@
1
- ---
2
- title: Numidium
3
- emoji: 🐢
4
- colorFrom: pink
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Numidium Backend
2
+
3
+ ## Plataforma de Inteligência
4
+
5
+ Backend do sistema Numidium/VANTAGE - uma plataforma de inteligência e análise de dados.
6
+
7
+ ## Stack
8
+
9
+ - **FastAPI** - API REST
10
+ - **SQLite** - Banco de dados
11
+ - **BeautifulSoup** - Web scraping
12
+ - **Requests** - HTTP client
13
+
14
+ ## Deploy
15
+
16
+ Hospedado no Hugging Face Spaces com Docker.
17
+
18
+ ## Rodando Localmente
19
+
20
+ ```bash
21
+ pip install -r requirements.txt
22
+ uvicorn app.main:app --reload
23
+ ```
24
+
25
+ ## Estrutura
26
+
27
+ ```
28
+ backend/
29
+ ├── app/
30
+ │ ├── main.py # FastAPI app
31
+ │ ├── config.py # Configurações
32
+ │ ├── api/routes/ # Endpoints
33
+ │ ├── core/ # Database, utils
34
+ │ ├── models/ # SQLAlchemy models
35
+ │ ├── schemas/ # Pydantic schemas
36
+ │ └── services/ # Business logic
37
+ ├── Dockerfile
38
+ └── requirements.txt
39
+ ```
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Numidium Backend App
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (158 Bytes). View file
 
app/__pycache__/config.cpython-311.pyc ADDED
Binary file (1.59 kB). View file
 
app/__pycache__/main.cpython-311.pyc ADDED
Binary file (3.57 kB). View file
 
app/api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # API module
app/api/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (162 Bytes). View file
 
app/api/routes/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # API Routes module
2
+ from app.api.routes import entities, relationships, events, search, ingest
app/api/routes/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (329 Bytes). View file
 
app/api/routes/__pycache__/entities.cpython-311.pyc ADDED
Binary file (9.78 kB). View file
 
app/api/routes/__pycache__/events.cpython-311.pyc ADDED
Binary file (7.17 kB). View file
 
app/api/routes/__pycache__/ingest.cpython-311.pyc ADDED
Binary file (7.39 kB). View file
 
app/api/routes/__pycache__/relationships.cpython-311.pyc ADDED
Binary file (5.03 kB). View file
 
app/api/routes/__pycache__/search.cpython-311.pyc ADDED
Binary file (7.2 kB). View file
 
app/api/routes/entities.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Entity CRUD Routes
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException, Query
5
+ from sqlalchemy.orm import Session
6
+ from sqlalchemy import or_
7
+ from typing import List, Optional
8
+
9
+ from app.core.database import get_db
10
+ from app.models import Entity, Relationship
11
+ from app.schemas import EntityCreate, EntityUpdate, EntityResponse, GraphData, GraphNode, GraphEdge
12
+
13
+ router = APIRouter(prefix="/entities", tags=["Entities"])
14
+
15
+
16
+ @router.get("/", response_model=List[EntityResponse])
17
+ def list_entities(
18
+ type: Optional[str] = None,
19
+ search: Optional[str] = None,
20
+ limit: int = Query(default=50, le=200),
21
+ offset: int = 0,
22
+ db: Session = Depends(get_db)
23
+ ):
24
+ """Lista todas as entidades com filtros opcionais"""
25
+ query = db.query(Entity)
26
+
27
+ if type:
28
+ query = query.filter(Entity.type == type)
29
+
30
+ if search:
31
+ query = query.filter(
32
+ or_(
33
+ Entity.name.ilike(f"%{search}%"),
34
+ Entity.description.ilike(f"%{search}%")
35
+ )
36
+ )
37
+
38
+ query = query.order_by(Entity.created_at.desc())
39
+ return query.offset(offset).limit(limit).all()
40
+
41
+
42
+ @router.get("/types")
43
+ def get_entity_types(db: Session = Depends(get_db)):
44
+ """Retorna todos os tipos de entidade únicos"""
45
+ types = db.query(Entity.type).distinct().all()
46
+ return [t[0] for t in types]
47
+
48
+
49
+ @router.get("/{entity_id}", response_model=EntityResponse)
50
+ def get_entity(entity_id: str, db: Session = Depends(get_db)):
51
+ """Busca uma entidade por ID"""
52
+ entity = db.query(Entity).filter(Entity.id == entity_id).first()
53
+ if not entity:
54
+ raise HTTPException(status_code=404, detail="Entity not found")
55
+ return entity
56
+
57
+
58
+ @router.post("/", response_model=EntityResponse, status_code=201)
59
+ def create_entity(entity: EntityCreate, db: Session = Depends(get_db)):
60
+ """Cria uma nova entidade"""
61
+ db_entity = Entity(**entity.model_dump())
62
+ db.add(db_entity)
63
+ db.commit()
64
+ db.refresh(db_entity)
65
+ return db_entity
66
+
67
+
68
+ @router.put("/{entity_id}", response_model=EntityResponse)
69
+ def update_entity(entity_id: str, entity: EntityUpdate, db: Session = Depends(get_db)):
70
+ """Atualiza uma entidade existente"""
71
+ db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
72
+ if not db_entity:
73
+ raise HTTPException(status_code=404, detail="Entity not found")
74
+
75
+ update_data = entity.model_dump(exclude_unset=True)
76
+ for field, value in update_data.items():
77
+ setattr(db_entity, field, value)
78
+
79
+ db.commit()
80
+ db.refresh(db_entity)
81
+ return db_entity
82
+
83
+
84
+ @router.delete("/{entity_id}")
85
+ def delete_entity(entity_id: str, db: Session = Depends(get_db)):
86
+ """Deleta uma entidade"""
87
+ db_entity = db.query(Entity).filter(Entity.id == entity_id).first()
88
+ if not db_entity:
89
+ raise HTTPException(status_code=404, detail="Entity not found")
90
+
91
+ # Delete related relationships
92
+ db.query(Relationship).filter(
93
+ or_(
94
+ Relationship.source_id == entity_id,
95
+ Relationship.target_id == entity_id
96
+ )
97
+ ).delete()
98
+
99
+ db.delete(db_entity)
100
+ db.commit()
101
+ return {"message": "Entity deleted"}
102
+
103
+
104
+ @router.get("/{entity_id}/connections", response_model=GraphData)
105
+ def get_entity_connections(
106
+ entity_id: str,
107
+ depth: int = Query(default=1, le=3),
108
+ db: Session = Depends(get_db)
109
+ ):
110
+ """
111
+ Retorna o grafo de conexões de uma entidade
112
+ Usado para visualização de rede no frontend
113
+ """
114
+ entity = db.query(Entity).filter(Entity.id == entity_id).first()
115
+ if not entity:
116
+ raise HTTPException(status_code=404, detail="Entity not found")
117
+
118
+ nodes = {}
119
+ edges = []
120
+ visited = set()
121
+
122
+ def explore(eid: str, current_depth: int):
123
+ if current_depth > depth or eid in visited:
124
+ return
125
+ visited.add(eid)
126
+
127
+ e = db.query(Entity).filter(Entity.id == eid).first()
128
+ if not e:
129
+ return
130
+
131
+ nodes[e.id] = GraphNode(
132
+ id=e.id,
133
+ type=e.type,
134
+ name=e.name,
135
+ properties=e.properties or {}
136
+ )
137
+
138
+ # Outgoing relationships
139
+ for rel in db.query(Relationship).filter(Relationship.source_id == eid).all():
140
+ edges.append(GraphEdge(
141
+ source=rel.source_id,
142
+ target=rel.target_id,
143
+ type=rel.type,
144
+ confidence=rel.confidence
145
+ ))
146
+ explore(rel.target_id, current_depth + 1)
147
+
148
+ # Incoming relationships
149
+ for rel in db.query(Relationship).filter(Relationship.target_id == eid).all():
150
+ edges.append(GraphEdge(
151
+ source=rel.source_id,
152
+ target=rel.target_id,
153
+ type=rel.type,
154
+ confidence=rel.confidence
155
+ ))
156
+ explore(rel.source_id, current_depth + 1)
157
+
158
+ explore(entity_id, 0)
159
+
160
+ return GraphData(
161
+ nodes=list(nodes.values()),
162
+ edges=edges
163
+ )
app/api/routes/events.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Events CRUD Routes
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException, Query
5
+ from sqlalchemy.orm import Session
6
+ from sqlalchemy import or_
7
+ from typing import List, Optional
8
+ from datetime import datetime
9
+
10
+ from app.core.database import get_db
11
+ from app.models import Event
12
+ from app.schemas import EventCreate, EventResponse
13
+
14
+ router = APIRouter(prefix="/events", tags=["Events"])
15
+
16
+
17
+ @router.get("/", response_model=List[EventResponse])
18
+ def list_events(
19
+ type: Optional[str] = None,
20
+ search: Optional[str] = None,
21
+ start_date: Optional[datetime] = None,
22
+ end_date: Optional[datetime] = None,
23
+ limit: int = Query(default=50, le=200),
24
+ offset: int = 0,
25
+ db: Session = Depends(get_db)
26
+ ):
27
+ """Lista eventos com filtros opcionais"""
28
+ query = db.query(Event)
29
+
30
+ if type:
31
+ query = query.filter(Event.type == type)
32
+
33
+ if search:
34
+ query = query.filter(
35
+ or_(
36
+ Event.title.ilike(f"%{search}%"),
37
+ Event.description.ilike(f"%{search}%")
38
+ )
39
+ )
40
+
41
+ if start_date:
42
+ query = query.filter(Event.event_date >= start_date)
43
+ if end_date:
44
+ query = query.filter(Event.event_date <= end_date)
45
+
46
+ query = query.order_by(Event.event_date.desc().nullslast())
47
+ return query.offset(offset).limit(limit).all()
48
+
49
+
50
+ @router.get("/types")
51
+ def get_event_types(db: Session = Depends(get_db)):
52
+ """Retorna todos os tipos de evento únicos"""
53
+ types = db.query(Event.type).distinct().all()
54
+ return [t[0] for t in types]
55
+
56
+
57
+ @router.get("/timeline")
58
+ def get_timeline(
59
+ entity_id: Optional[str] = None,
60
+ limit: int = Query(default=50, le=200),
61
+ db: Session = Depends(get_db)
62
+ ):
63
+ """
64
+ Retorna eventos em formato timeline
65
+ Útil para visualização no frontend
66
+ """
67
+ query = db.query(Event).filter(Event.event_date.isnot(None))
68
+
69
+ if entity_id:
70
+ # Filter events that include this entity
71
+ # Note: This is a simple implementation; for production, use a proper JSON query
72
+ query = query.filter(Event.entity_ids.contains([entity_id]))
73
+
74
+ events = query.order_by(Event.event_date.asc()).limit(limit).all()
75
+
76
+ return [
77
+ {
78
+ "id": e.id,
79
+ "title": e.title,
80
+ "date": e.event_date.isoformat() if e.event_date else None,
81
+ "type": e.type,
82
+ "location": e.location_name
83
+ }
84
+ for e in events
85
+ ]
86
+
87
+
88
+ @router.get("/{event_id}", response_model=EventResponse)
89
+ def get_event(event_id: str, db: Session = Depends(get_db)):
90
+ """Busca um evento por ID"""
91
+ event = db.query(Event).filter(Event.id == event_id).first()
92
+ if not event:
93
+ raise HTTPException(status_code=404, detail="Event not found")
94
+ return event
95
+
96
+
97
+ @router.post("/", response_model=EventResponse, status_code=201)
98
+ def create_event(event: EventCreate, db: Session = Depends(get_db)):
99
+ """Cria um novo evento"""
100
+ db_event = Event(**event.model_dump())
101
+ db.add(db_event)
102
+ db.commit()
103
+ db.refresh(db_event)
104
+ return db_event
105
+
106
+
107
+ @router.delete("/{event_id}")
108
+ def delete_event(event_id: str, db: Session = Depends(get_db)):
109
+ """Deleta um evento"""
110
+ db_event = db.query(Event).filter(Event.id == event_id).first()
111
+ if not db_event:
112
+ raise HTTPException(status_code=404, detail="Event not found")
113
+
114
+ db.delete(db_event)
115
+ db.commit()
116
+ return {"message": "Event deleted"}
app/api/routes/ingest.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data Ingestion Routes
3
+ Endpoints para importar dados de fontes externas
4
+ """
5
+ from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
6
+ from sqlalchemy.orm import Session
7
+ from typing import Optional, List
8
+
9
+ from app.core.database import get_db
10
+ from app.models import Entity, Document
11
+ from app.schemas import EntityResponse, DocumentResponse
12
+ from app.services.ingestion import wikipedia_scraper, news_service
13
+
14
+ router = APIRouter(prefix="/ingest", tags=["Data Ingestion"])
15
+
16
+
17
+ # ========== Wikipedia ==========
18
+
19
+ @router.get("/wikipedia/search")
20
+ def search_wikipedia(q: str, limit: int = 10):
21
+ """Busca artigos na Wikipedia"""
22
+ results = wikipedia_scraper.search(q, limit)
23
+ return results
24
+
25
+
26
+ @router.post("/wikipedia/entity", response_model=EntityResponse)
27
+ def import_from_wikipedia(
28
+ title: str,
29
+ entity_type: str = "person",
30
+ db: Session = Depends(get_db)
31
+ ):
32
+ """
33
+ Importa uma entidade da Wikipedia
34
+ entity_type: person, organization, location
35
+ """
36
+ # Check if entity already exists
37
+ existing = db.query(Entity).filter(
38
+ Entity.name == title,
39
+ Entity.source == "wikipedia"
40
+ ).first()
41
+
42
+ if existing:
43
+ return existing
44
+
45
+ # Scrape based on type
46
+ if entity_type == "person":
47
+ data = wikipedia_scraper.scrape_person(title)
48
+ elif entity_type == "organization":
49
+ data = wikipedia_scraper.scrape_organization(title)
50
+ elif entity_type == "location":
51
+ data = wikipedia_scraper.scrape_location(title)
52
+ else:
53
+ data = wikipedia_scraper.scrape_person(title) # default
54
+
55
+ if not data:
56
+ raise HTTPException(status_code=404, detail="Article not found on Wikipedia")
57
+
58
+ # Create entity
59
+ entity = Entity(**data)
60
+ db.add(entity)
61
+ db.commit()
62
+ db.refresh(entity)
63
+
64
+ return entity
65
+
66
+
67
+ # ========== News ==========
68
+
69
+ @router.get("/news/feeds")
70
+ def list_available_feeds():
71
+ """Lista os feeds de notícias disponíveis"""
72
+ return list(news_service.RSS_FEEDS.keys())
73
+
74
+
75
+ @router.get("/news/fetch")
76
+ def fetch_news(feed: Optional[str] = None):
77
+ """
78
+ Busca notícias dos feeds RSS
79
+ Se feed não for especificado, busca de todos
80
+ """
81
+ if feed:
82
+ if feed not in news_service.RSS_FEEDS:
83
+ raise HTTPException(status_code=404, detail="Feed not found")
84
+ url = news_service.RSS_FEEDS[feed]
85
+ articles = news_service.fetch_feed(url)
86
+ else:
87
+ articles = news_service.fetch_all_feeds()
88
+
89
+ return articles
90
+
91
+
92
+ @router.get("/news/search")
93
+ def search_news(q: str):
94
+ """Busca notícias por palavra-chave via Google News"""
95
+ return news_service.search_news(q)
96
+
97
+
98
+ @router.post("/news/import")
99
+ def import_news(
100
+ query: Optional[str] = None,
101
+ feed: Optional[str] = None,
102
+ db: Session = Depends(get_db)
103
+ ):
104
+ """
105
+ Importa notícias como documentos no sistema
106
+ """
107
+ if query:
108
+ articles = news_service.search_news(query)
109
+ elif feed:
110
+ if feed not in news_service.RSS_FEEDS:
111
+ raise HTTPException(status_code=404, detail="Feed not found")
112
+ articles = news_service.fetch_feed(news_service.RSS_FEEDS[feed])
113
+ else:
114
+ articles = news_service.fetch_all_feeds()
115
+
116
+ imported = 0
117
+ for article in articles:
118
+ # Check if document already exists (by URL)
119
+ if article.get("url"):
120
+ existing = db.query(Document).filter(
121
+ Document.source_url == article["url"]
122
+ ).first()
123
+ if existing:
124
+ continue
125
+
126
+ doc_data = news_service.to_document(article)
127
+ doc = Document(**doc_data)
128
+ db.add(doc)
129
+ imported += 1
130
+
131
+ db.commit()
132
+
133
+ return {"message": f"Imported {imported} articles", "total_found": len(articles)}
134
+
135
+
136
+ # ========== Manual Import ==========
137
+
138
+ @router.post("/bulk/entities")
139
+ def bulk_import_entities(
140
+ entities: List[dict],
141
+ db: Session = Depends(get_db)
142
+ ):
143
+ """
144
+ Importa múltiplas entidades de uma vez
145
+ Útil para importar de CSV/JSON
146
+ """
147
+ imported = 0
148
+ for entity_data in entities:
149
+ entity = Entity(
150
+ type=entity_data.get("type", "unknown"),
151
+ name=entity_data.get("name", "Unnamed"),
152
+ description=entity_data.get("description"),
153
+ properties=entity_data.get("properties", {}),
154
+ latitude=entity_data.get("latitude"),
155
+ longitude=entity_data.get("longitude"),
156
+ source=entity_data.get("source", "manual")
157
+ )
158
+ db.add(entity)
159
+ imported += 1
160
+
161
+ db.commit()
162
+
163
+ return {"message": f"Imported {imported} entities"}
app/api/routes/relationships.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Relationship CRUD Routes
3
+ """
4
+ from fastapi import APIRouter, Depends, HTTPException, Query
5
+ from sqlalchemy.orm import Session
6
+ from typing import List, Optional
7
+
8
+ from app.core.database import get_db
9
+ from app.models import Relationship, Entity
10
+ from app.schemas import RelationshipCreate, RelationshipResponse
11
+
12
+ router = APIRouter(prefix="/relationships", tags=["Relationships"])
13
+
14
+
15
+ @router.get("/", response_model=List[RelationshipResponse])
16
+ def list_relationships(
17
+ type: Optional[str] = None,
18
+ source_id: Optional[str] = None,
19
+ target_id: Optional[str] = None,
20
+ limit: int = Query(default=50, le=200),
21
+ db: Session = Depends(get_db)
22
+ ):
23
+ """Lista relacionamentos com filtros opcionais"""
24
+ query = db.query(Relationship)
25
+
26
+ if type:
27
+ query = query.filter(Relationship.type == type)
28
+ if source_id:
29
+ query = query.filter(Relationship.source_id == source_id)
30
+ if target_id:
31
+ query = query.filter(Relationship.target_id == target_id)
32
+
33
+ return query.limit(limit).all()
34
+
35
+
36
+ @router.get("/types")
37
+ def get_relationship_types(db: Session = Depends(get_db)):
38
+ """Retorna todos os tipos de relacionamento únicos"""
39
+ types = db.query(Relationship.type).distinct().all()
40
+ return [t[0] for t in types]
41
+
42
+
43
+ @router.post("/", response_model=RelationshipResponse, status_code=201)
44
+ def create_relationship(rel: RelationshipCreate, db: Session = Depends(get_db)):
45
+ """Cria um novo relacionamento entre entidades"""
46
+ # Verify both entities exist
47
+ source = db.query(Entity).filter(Entity.id == rel.source_id).first()
48
+ target = db.query(Entity).filter(Entity.id == rel.target_id).first()
49
+
50
+ if not source:
51
+ raise HTTPException(status_code=404, detail="Source entity not found")
52
+ if not target:
53
+ raise HTTPException(status_code=404, detail="Target entity not found")
54
+
55
+ db_rel = Relationship(**rel.model_dump())
56
+ db.add(db_rel)
57
+ db.commit()
58
+ db.refresh(db_rel)
59
+ return db_rel
60
+
61
+
62
+ @router.delete("/{relationship_id}")
63
+ def delete_relationship(relationship_id: str, db: Session = Depends(get_db)):
64
+ """Deleta um relacionamento"""
65
+ db_rel = db.query(Relationship).filter(Relationship.id == relationship_id).first()
66
+ if not db_rel:
67
+ raise HTTPException(status_code=404, detail="Relationship not found")
68
+
69
+ db.delete(db_rel)
70
+ db.commit()
71
+ return {"message": "Relationship deleted"}
app/api/routes/search.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Search and Analytics Routes
3
+ """
4
+ from fastapi import APIRouter, Depends, Query
5
+ from sqlalchemy.orm import Session
6
+ from sqlalchemy import or_, func
7
+ from typing import Optional, List
8
+
9
+ from app.core.database import get_db
10
+ from app.models import Entity, Relationship, Event, Document
11
+ from app.schemas import SearchResult, SystemStats, EntityResponse, EventResponse, DocumentResponse
12
+
13
+ router = APIRouter(prefix="/search", tags=["Search"])
14
+
15
+
16
+ @router.get("/", response_model=SearchResult)
17
+ def global_search(
18
+ q: str = Query(..., min_length=2, description="Search query"),
19
+ types: Optional[str] = Query(None, description="Entity types (comma-separated)"),
20
+ limit: int = Query(default=20, le=100),
21
+ db: Session = Depends(get_db)
22
+ ):
23
+ """
24
+ Busca global em todas as entidades, eventos e documentos
25
+ """
26
+ search_term = f"%{q}%"
27
+ type_filter = types.split(",") if types else None
28
+
29
+ # Search entities
30
+ entity_query = db.query(Entity).filter(
31
+ or_(
32
+ Entity.name.ilike(search_term),
33
+ Entity.description.ilike(search_term)
34
+ )
35
+ )
36
+ if type_filter:
37
+ entity_query = entity_query.filter(Entity.type.in_(type_filter))
38
+ entities = entity_query.limit(limit).all()
39
+
40
+ # Search events
41
+ events = db.query(Event).filter(
42
+ or_(
43
+ Event.title.ilike(search_term),
44
+ Event.description.ilike(search_term)
45
+ )
46
+ ).limit(limit).all()
47
+
48
+ # Search documents
49
+ documents = db.query(Document).filter(
50
+ or_(
51
+ Document.title.ilike(search_term),
52
+ Document.content.ilike(search_term)
53
+ )
54
+ ).limit(limit).all()
55
+
56
+ return SearchResult(
57
+ entities=entities,
58
+ events=events,
59
+ documents=documents
60
+ )
61
+
62
+
63
+ @router.get("/stats", response_model=SystemStats)
64
+ def get_system_stats(db: Session = Depends(get_db)):
65
+ """
66
+ Retorna estatísticas gerais do sistema
67
+ Usado no dashboard do VANTAGE
68
+ """
69
+ total_entities = db.query(Entity).count()
70
+ total_relationships = db.query(Relationship).count()
71
+ total_events = db.query(Event).count()
72
+ total_documents = db.query(Document).count()
73
+
74
+ # Count by entity type
75
+ type_counts = db.query(
76
+ Entity.type,
77
+ func.count(Entity.id)
78
+ ).group_by(Entity.type).all()
79
+
80
+ entities_by_type = {t: c for t, c in type_counts}
81
+
82
+ # Recent activity (last 10 entities created)
83
+ recent = db.query(Entity).order_by(Entity.created_at.desc()).limit(10).all()
84
+ recent_activity = [
85
+ {
86
+ "id": e.id,
87
+ "type": e.type,
88
+ "name": e.name,
89
+ "created_at": e.created_at.isoformat()
90
+ }
91
+ for e in recent
92
+ ]
93
+
94
+ return SystemStats(
95
+ total_entities=total_entities,
96
+ total_relationships=total_relationships,
97
+ total_events=total_events,
98
+ total_documents=total_documents,
99
+ entities_by_type=entities_by_type,
100
+ recent_activity=recent_activity
101
+ )
102
+
103
+
104
+ @router.get("/geo")
105
+ def get_geo_data(
106
+ entity_type: Optional[str] = None,
107
+ db: Session = Depends(get_db)
108
+ ):
109
+ """
110
+ Retorna entidades com geolocalização
111
+ Usado para visualização em mapa
112
+ """
113
+ query = db.query(Entity).filter(
114
+ Entity.latitude.isnot(None),
115
+ Entity.longitude.isnot(None)
116
+ )
117
+
118
+ if entity_type:
119
+ query = query.filter(Entity.type == entity_type)
120
+
121
+ entities = query.all()
122
+
123
+ return [
124
+ {
125
+ "id": e.id,
126
+ "type": e.type,
127
+ "name": e.name,
128
+ "lat": e.latitude,
129
+ "lng": e.longitude,
130
+ "properties": e.properties
131
+ }
132
+ for e in entities
133
+ ]
app/config.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Numidium Backend Configuration
3
+ """
4
+ from pydantic_settings import BaseSettings
5
+ from functools import lru_cache
6
+ import os
7
+
8
+
9
+ class Settings(BaseSettings):
10
+ """Application settings"""
11
+
12
+ # App Info
13
+ app_name: str = "Numidium"
14
+ app_version: str = "0.1.0"
15
+ debug: bool = False
16
+
17
+ # Database
18
+ database_url: str = "sqlite:///./data/numidium.db"
19
+
20
+ # APIs (opcional - pode configurar depois)
21
+ newsapi_key: str = ""
22
+
23
+ # CORS
24
+ cors_origins: list[str] = ["*"]
25
+
26
+ class Config:
27
+ env_file = ".env"
28
+ env_file_encoding = "utf-8"
29
+
30
+
31
+ @lru_cache()
32
+ def get_settings() -> Settings:
33
+ """Get cached settings"""
34
+ return Settings()
35
+
36
+
37
+ settings = get_settings()
app/core/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Core module
2
+ from app.core.database import get_db, init_db, Base
app/core/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (270 Bytes). View file
 
app/core/__pycache__/database.cpython-311.pyc ADDED
Binary file (1.54 kB). View file
 
app/core/database.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database configuration and session management
3
+ """
4
+ from sqlalchemy import create_engine
5
+ from sqlalchemy.ext.declarative import declarative_base
6
+ from sqlalchemy.orm import sessionmaker
7
+ import os
8
+
9
+ from app.config import settings
10
+
11
+ # Ensure data directory exists
12
+ os.makedirs("data", exist_ok=True)
13
+
14
+ # Create engine
15
+ engine = create_engine(
16
+ settings.database_url,
17
+ connect_args={"check_same_thread": False} # Needed for SQLite
18
+ )
19
+
20
+ # Session factory
21
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
22
+
23
+ # Base class for models
24
+ Base = declarative_base()
25
+
26
+
27
+ def get_db():
28
+ """Dependency to get database session"""
29
+ db = SessionLocal()
30
+ try:
31
+ yield db
32
+ finally:
33
+ db.close()
34
+
35
+
36
+ def init_db():
37
+ """Initialize database tables"""
38
+ Base.metadata.create_all(bind=engine)
app/main.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Numidium Backend - Main Application
3
+ Plataforma de Inteligência e Análise de Dados
4
+ """
5
+ from fastapi import FastAPI
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from contextlib import asynccontextmanager
8
+
9
+ from app.config import settings
10
+ from app.core.database import init_db
11
+ from app.api.routes import entities, relationships, events, search, ingest
12
+
13
+
14
+ @asynccontextmanager
15
+ async def lifespan(app: FastAPI):
16
+ """Startup and shutdown events"""
17
+ # Startup: Initialize database
18
+ init_db()
19
+ print("🚀 Numidium Backend started!")
20
+ print(f"📊 Database: {settings.database_url}")
21
+ yield
22
+ # Shutdown
23
+ print("👋 Numidium Backend shutting down...")
24
+
25
+
26
+ # Create FastAPI app
27
+ app = FastAPI(
28
+ title="Numidium API",
29
+ description="""
30
+ ## 🔮 Sistema de Inteligência e Análise de Dados
31
+
32
+ Backend do VANTAGE - Uma plataforma para:
33
+ - 📥 Ingestão de dados de múltiplas fontes (Wikipedia, News, Manual)
34
+ - 🔗 Mapeamento de conexões entre entidades
35
+ - 🗺️ Visualização geográfica
36
+ - 📊 Análise de grafos e relacionamentos
37
+ - 🔍 Busca global
38
+ """,
39
+ version=settings.app_version,
40
+ lifespan=lifespan
41
+ )
42
+
43
+ # CORS middleware
44
+ app.add_middleware(
45
+ CORSMiddleware,
46
+ allow_origins=settings.cors_origins,
47
+ allow_credentials=True,
48
+ allow_methods=["*"],
49
+ allow_headers=["*"],
50
+ )
51
+
52
+ # Include routers
53
+ app.include_router(entities.router, prefix="/api/v1")
54
+ app.include_router(relationships.router, prefix="/api/v1")
55
+ app.include_router(events.router, prefix="/api/v1")
56
+ app.include_router(search.router, prefix="/api/v1")
57
+ app.include_router(ingest.router, prefix="/api/v1")
58
+
59
+
60
+ @app.get("/")
61
+ def root():
62
+ """Root endpoint - API info"""
63
+ return {
64
+ "name": "Numidium",
65
+ "version": settings.app_version,
66
+ "status": "online",
67
+ "docs": "/docs",
68
+ "description": "Sistema de Inteligência e Análise de Dados"
69
+ }
70
+
71
+
72
+ @app.get("/health")
73
+ def health_check():
74
+ """Health check endpoint for HF Spaces"""
75
+ return {"status": "healthy"}
76
+
77
+
78
+ @app.get("/api/v1")
79
+ def api_info():
80
+ """API v1 info"""
81
+ return {
82
+ "version": "1.0.0",
83
+ "endpoints": {
84
+ "entities": "/api/v1/entities",
85
+ "relationships": "/api/v1/relationships",
86
+ "events": "/api/v1/events",
87
+ "search": "/api/v1/search",
88
+ "ingest": "/api/v1/ingest"
89
+ }
90
+ }
app/models/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Models module
2
+ from app.models.entity import Entity, Relationship, Event, Document
app/models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (303 Bytes). View file
 
app/models/__pycache__/entity.cpython-311.pyc ADDED
Binary file (6.09 kB). View file
 
app/models/entity.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SQLAlchemy Models for Numidium
3
+ """
4
+ from sqlalchemy import Column, String, Text, DateTime, Float, JSON, ForeignKey, Table
5
+ from sqlalchemy.orm import relationship
6
+ from datetime import datetime
7
+ import uuid
8
+
9
+ from app.core.database import Base
10
+
11
+
12
+ def generate_uuid():
13
+ return str(uuid.uuid4())
14
+
15
+
16
+ class Entity(Base):
17
+ """
18
+ Entidade - qualquer coisa rastreável no sistema
19
+ Pode ser: pessoa, organização, local, veículo, evento, documento, etc.
20
+ """
21
+ __tablename__ = "entities"
22
+
23
+ id = Column(String(36), primary_key=True, default=generate_uuid)
24
+ type = Column(String(50), nullable=False, index=True) # person, organization, location, etc
25
+ name = Column(String(255), nullable=False, index=True)
26
+ description = Column(Text, nullable=True)
27
+ properties = Column(JSON, default=dict) # Dados flexíveis
28
+
29
+ # Geolocalização (opcional)
30
+ latitude = Column(Float, nullable=True)
31
+ longitude = Column(Float, nullable=True)
32
+
33
+ # Fonte do dado
34
+ source = Column(String(100), nullable=True) # wikipedia, newsapi, manual, etc
35
+ source_url = Column(Text, nullable=True)
36
+
37
+ # Timestamps
38
+ created_at = Column(DateTime, default=datetime.utcnow)
39
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
40
+
41
+ # Relacionamentos
42
+ outgoing_relationships = relationship(
43
+ "Relationship",
44
+ foreign_keys="Relationship.source_id",
45
+ back_populates="source_entity"
46
+ )
47
+ incoming_relationships = relationship(
48
+ "Relationship",
49
+ foreign_keys="Relationship.target_id",
50
+ back_populates="target_entity"
51
+ )
52
+
53
+
54
+ class Relationship(Base):
55
+ """
56
+ Relacionamento entre duas entidades
57
+ Exemplos: works_for, knows, owns, located_at, participated_in
58
+ """
59
+ __tablename__ = "relationships"
60
+
61
+ id = Column(String(36), primary_key=True, default=generate_uuid)
62
+ source_id = Column(String(36), ForeignKey("entities.id"), nullable=False)
63
+ target_id = Column(String(36), ForeignKey("entities.id"), nullable=False)
64
+ type = Column(String(50), nullable=False, index=True) # works_for, knows, owns, etc
65
+ properties = Column(JSON, default=dict)
66
+ confidence = Column(Float, default=1.0) # 0-1, quão certo estamos dessa conexão
67
+
68
+ # Fonte
69
+ source = Column(String(100), nullable=True)
70
+
71
+ # Timestamps
72
+ created_at = Column(DateTime, default=datetime.utcnow)
73
+
74
+ # Relacionamentos
75
+ source_entity = relationship("Entity", foreign_keys=[source_id], back_populates="outgoing_relationships")
76
+ target_entity = relationship("Entity", foreign_keys=[target_id], back_populates="incoming_relationships")
77
+
78
+
79
+ class Event(Base):
80
+ """
81
+ Evento - algo que aconteceu envolvendo entidades
82
+ """
83
+ __tablename__ = "events"
84
+
85
+ id = Column(String(36), primary_key=True, default=generate_uuid)
86
+ type = Column(String(50), nullable=False, index=True)
87
+ title = Column(String(255), nullable=False)
88
+ description = Column(Text, nullable=True)
89
+
90
+ # Quando aconteceu
91
+ event_date = Column(DateTime, nullable=True)
92
+
93
+ # Onde aconteceu
94
+ location_name = Column(String(255), nullable=True)
95
+ latitude = Column(Float, nullable=True)
96
+ longitude = Column(Float, nullable=True)
97
+
98
+ # Entidades envolvidas (armazenado como JSON array de IDs)
99
+ entity_ids = Column(JSON, default=list)
100
+
101
+ # Fonte
102
+ source = Column(String(100), nullable=True)
103
+ source_url = Column(Text, nullable=True)
104
+
105
+ # Metadados
106
+ properties = Column(JSON, default=dict)
107
+
108
+ # Timestamps
109
+ created_at = Column(DateTime, default=datetime.utcnow)
110
+
111
+
112
+ class Document(Base):
113
+ """
114
+ Documento - texto/arquivo para análise
115
+ """
116
+ __tablename__ = "documents"
117
+
118
+ id = Column(String(36), primary_key=True, default=generate_uuid)
119
+ title = Column(String(255), nullable=False)
120
+ content = Column(Text, nullable=True)
121
+ summary = Column(Text, nullable=True) # Resumo gerado por IA
122
+
123
+ # Tipo de documento
124
+ doc_type = Column(String(50), default="text") # text, news, report, etc
125
+
126
+ # Entidades mencionadas (extraídas por NLP)
127
+ mentioned_entities = Column(JSON, default=list)
128
+
129
+ # Fonte
130
+ source = Column(String(100), nullable=True)
131
+ source_url = Column(Text, nullable=True)
132
+
133
+ # Timestamps
134
+ published_at = Column(DateTime, nullable=True)
135
+ created_at = Column(DateTime, default=datetime.utcnow)
app/schemas/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Schemas module
2
+ from app.schemas.schemas import (
3
+ EntityCreate, EntityUpdate, EntityResponse,
4
+ RelationshipCreate, RelationshipResponse,
5
+ EventCreate, EventResponse,
6
+ DocumentCreate, DocumentResponse,
7
+ GraphData, GraphNode, GraphEdge,
8
+ SearchQuery, SearchResult,
9
+ SystemStats
10
+ )
app/schemas/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (725 Bytes). View file
 
app/schemas/__pycache__/schemas.cpython-311.pyc ADDED
Binary file (9.17 kB). View file
 
app/schemas/schemas.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pydantic Schemas for API validation
3
+ """
4
+ from pydantic import BaseModel, Field
5
+ from typing import Optional, List, Any
6
+ from datetime import datetime
7
+
8
+
9
+ # ========== Entity Schemas ==========
10
+
11
+ class EntityBase(BaseModel):
12
+ type: str = Field(..., description="Tipo da entidade: person, organization, location, etc")
13
+ name: str = Field(..., description="Nome da entidade")
14
+ description: Optional[str] = None
15
+ properties: dict = Field(default_factory=dict)
16
+ latitude: Optional[float] = None
17
+ longitude: Optional[float] = None
18
+ source: Optional[str] = None
19
+ source_url: Optional[str] = None
20
+
21
+
22
+ class EntityCreate(EntityBase):
23
+ pass
24
+
25
+
26
+ class EntityUpdate(BaseModel):
27
+ type: Optional[str] = None
28
+ name: Optional[str] = None
29
+ description: Optional[str] = None
30
+ properties: Optional[dict] = None
31
+ latitude: Optional[float] = None
32
+ longitude: Optional[float] = None
33
+
34
+
35
+ class EntityResponse(EntityBase):
36
+ id: str
37
+ created_at: datetime
38
+ updated_at: datetime
39
+
40
+ class Config:
41
+ from_attributes = True
42
+
43
+
44
+ # ========== Relationship Schemas ==========
45
+
46
+ class RelationshipBase(BaseModel):
47
+ source_id: str
48
+ target_id: str
49
+ type: str = Field(..., description="Tipo: works_for, knows, owns, located_at, etc")
50
+ properties: dict = Field(default_factory=dict)
51
+ confidence: float = Field(default=1.0, ge=0, le=1)
52
+ source: Optional[str] = None
53
+
54
+
55
+ class RelationshipCreate(RelationshipBase):
56
+ pass
57
+
58
+
59
+ class RelationshipResponse(RelationshipBase):
60
+ id: str
61
+ created_at: datetime
62
+
63
+ class Config:
64
+ from_attributes = True
65
+
66
+
67
+ # ========== Event Schemas ==========
68
+
69
+ class EventBase(BaseModel):
70
+ type: str
71
+ title: str
72
+ description: Optional[str] = None
73
+ event_date: Optional[datetime] = None
74
+ location_name: Optional[str] = None
75
+ latitude: Optional[float] = None
76
+ longitude: Optional[float] = None
77
+ entity_ids: List[str] = Field(default_factory=list)
78
+ source: Optional[str] = None
79
+ source_url: Optional[str] = None
80
+ properties: dict = Field(default_factory=dict)
81
+
82
+
83
+ class EventCreate(EventBase):
84
+ pass
85
+
86
+
87
+ class EventResponse(EventBase):
88
+ id: str
89
+ created_at: datetime
90
+
91
+ class Config:
92
+ from_attributes = True
93
+
94
+
95
+ # ========== Document Schemas ==========
96
+
97
+ class DocumentBase(BaseModel):
98
+ title: str
99
+ content: Optional[str] = None
100
+ doc_type: str = "text"
101
+ source: Optional[str] = None
102
+ source_url: Optional[str] = None
103
+ published_at: Optional[datetime] = None
104
+
105
+
106
+ class DocumentCreate(DocumentBase):
107
+ pass
108
+
109
+
110
+ class DocumentResponse(DocumentBase):
111
+ id: str
112
+ summary: Optional[str] = None
113
+ mentioned_entities: List[str] = []
114
+ created_at: datetime
115
+
116
+ class Config:
117
+ from_attributes = True
118
+
119
+
120
+ # ========== Graph Schemas ==========
121
+
122
+ class GraphNode(BaseModel):
123
+ id: str
124
+ type: str
125
+ name: str
126
+ properties: dict = {}
127
+
128
+
129
+ class GraphEdge(BaseModel):
130
+ source: str
131
+ target: str
132
+ type: str
133
+ confidence: float = 1.0
134
+
135
+
136
+ class GraphData(BaseModel):
137
+ nodes: List[GraphNode]
138
+ edges: List[GraphEdge]
139
+
140
+
141
+ # ========== Search Schemas ==========
142
+
143
+ class SearchQuery(BaseModel):
144
+ query: str
145
+ entity_types: Optional[List[str]] = None
146
+ limit: int = Field(default=20, le=100)
147
+
148
+
149
+ class SearchResult(BaseModel):
150
+ entities: List[EntityResponse]
151
+ events: List[EventResponse]
152
+ documents: List[DocumentResponse]
153
+
154
+
155
+ # ========== Stats Schemas ==========
156
+
157
+ class SystemStats(BaseModel):
158
+ total_entities: int
159
+ total_relationships: int
160
+ total_events: int
161
+ total_documents: int
162
+ entities_by_type: dict
163
+ recent_activity: List[dict]
app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Services module
app/services/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (167 Bytes). View file
 
app/services/analysis/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Analysis services
app/services/ingestion/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Ingestion services
2
+ from app.services.ingestion.wikipedia import wikipedia_scraper
3
+ from app.services.ingestion.news import news_service
app/services/ingestion/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (345 Bytes). View file
 
app/services/ingestion/__pycache__/news.cpython-311.pyc ADDED
Binary file (4.54 kB). View file
 
app/services/ingestion/__pycache__/wikipedia.cpython-311.pyc ADDED
Binary file (8.01 kB). View file
 
app/services/ingestion/news.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ News API Client Service
3
+ Usa RSS feeds públicos para não precisar de API key
4
+ """
5
+ import feedparser
6
+ import requests
7
+ from typing import List, Dict
8
+ from datetime import datetime
9
+ import re
10
+
11
+
12
+ class NewsService:
13
+ """Serviço para buscar notícias de fontes públicas via RSS"""
14
+
15
+ # RSS feeds públicos brasileiros e internacionais
16
+ RSS_FEEDS = {
17
+ "g1": "https://g1.globo.com/rss/g1/",
18
+ "folha": "https://feeds.folha.uol.com.br/folha/rss/rss091.xml",
19
+ "bbc_brasil": "https://www.bbc.com/portuguese/articles/rss.xml",
20
+ "reuters": "https://www.reutersagency.com/feed/",
21
+ "google_news_br": "https://news.google.com/rss?hl=pt-BR&gl=BR&ceid=BR:pt-419"
22
+ }
23
+
24
+ def fetch_feed(self, feed_url: str) -> List[Dict]:
25
+ """Busca artigos de um feed RSS"""
26
+ try:
27
+ feed = feedparser.parse(feed_url)
28
+ articles = []
29
+
30
+ for entry in feed.entries[:20]: # Limitar a 20 artigos
31
+ published = None
32
+ if hasattr(entry, 'published_parsed') and entry.published_parsed:
33
+ published = datetime(*entry.published_parsed[:6])
34
+
35
+ articles.append({
36
+ "title": entry.get("title", ""),
37
+ "description": self._clean_html(entry.get("summary", "")),
38
+ "url": entry.get("link", ""),
39
+ "published_at": published,
40
+ "source": feed.feed.get("title", "Unknown")
41
+ })
42
+
43
+ return articles
44
+ except Exception as e:
45
+ print(f"Error fetching feed {feed_url}: {e}")
46
+ return []
47
+
48
+ def fetch_all_feeds(self) -> List[Dict]:
49
+ """Busca artigos de todos os feeds configurados"""
50
+ all_articles = []
51
+ for name, url in self.RSS_FEEDS.items():
52
+ articles = self.fetch_feed(url)
53
+ for article in articles:
54
+ article["feed_name"] = name
55
+ all_articles.extend(articles)
56
+ return all_articles
57
+
58
+ def search_news(self, query: str) -> List[Dict]:
59
+ """
60
+ Busca notícias pelo Google News RSS
61
+ """
62
+ # Google News RSS search
63
+ search_url = f"https://news.google.com/rss/search?q={query}&hl=pt-BR&gl=BR&ceid=BR:pt-419"
64
+ return self.fetch_feed(search_url)
65
+
66
+ def _clean_html(self, text: str) -> str:
67
+ """Remove HTML tags do texto"""
68
+ clean = re.compile('<.*?>')
69
+ return re.sub(clean, '', text)
70
+
71
+ def to_document(self, article: Dict) -> Dict:
72
+ """
73
+ Converte um artigo de notícia para o formato Document
74
+ """
75
+ return {
76
+ "title": article["title"],
77
+ "content": article.get("description", ""),
78
+ "doc_type": "news",
79
+ "source": article.get("source", "news"),
80
+ "source_url": article.get("url"),
81
+ "published_at": article.get("published_at")
82
+ }
83
+
84
+
85
+ # Singleton instance
86
+ news_service = NewsService()
app/services/ingestion/wikipedia.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Wikipedia Scraper Service
3
+ """
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from typing import Optional, Dict, List
7
+ import re
8
+
9
+
10
+ class WikipediaScraper:
11
+ """Scraper para extrair dados da Wikipedia"""
12
+
13
+ BASE_URL = "https://pt.wikipedia.org"
14
+ API_URL = "https://pt.wikipedia.org/w/api.php"
15
+
16
+ def search(self, query: str, limit: int = 10) -> List[Dict]:
17
+ """
18
+ Busca artigos na Wikipedia
19
+ """
20
+ params = {
21
+ "action": "query",
22
+ "list": "search",
23
+ "srsearch": query,
24
+ "srlimit": limit,
25
+ "format": "json"
26
+ }
27
+
28
+ response = requests.get(self.API_URL, params=params)
29
+ data = response.json()
30
+
31
+ results = []
32
+ for item in data.get("query", {}).get("search", []):
33
+ results.append({
34
+ "title": item["title"],
35
+ "snippet": BeautifulSoup(item["snippet"], "html.parser").get_text(),
36
+ "pageid": item["pageid"]
37
+ })
38
+
39
+ return results
40
+
41
+ def get_article(self, title: str) -> Optional[Dict]:
42
+ """
43
+ Busca informações completas de um artigo
44
+ """
45
+ params = {
46
+ "action": "query",
47
+ "titles": title,
48
+ "prop": "extracts|pageimages|coordinates|categories",
49
+ "exintro": True,
50
+ "explaintext": True,
51
+ "pithumbsize": 300,
52
+ "format": "json"
53
+ }
54
+
55
+ response = requests.get(self.API_URL, params=params)
56
+ data = response.json()
57
+
58
+ pages = data.get("query", {}).get("pages", {})
59
+ for page_id, page in pages.items():
60
+ if page_id == "-1":
61
+ return None
62
+
63
+ result = {
64
+ "title": page.get("title"),
65
+ "extract": page.get("extract"),
66
+ "pageid": page.get("pageid"),
67
+ "url": f"{self.BASE_URL}/wiki/{page.get('title', '').replace(' ', '_')}",
68
+ "thumbnail": page.get("thumbnail", {}).get("source"),
69
+ "categories": [c["title"].replace("Categoria:", "")
70
+ for c in page.get("categories", [])]
71
+ }
72
+
73
+ # Coordenadas se disponíveis
74
+ if "coordinates" in page:
75
+ coords = page["coordinates"][0]
76
+ result["latitude"] = coords.get("lat")
77
+ result["longitude"] = coords.get("lon")
78
+
79
+ return result
80
+
81
+ return None
82
+
83
+ def get_infobox(self, title: str) -> Dict:
84
+ """
85
+ Tenta extrair dados estruturados do infobox de um artigo
86
+ """
87
+ url = f"{self.BASE_URL}/wiki/{title.replace(' ', '_')}"
88
+ response = requests.get(url)
89
+ soup = BeautifulSoup(response.text, "lxml")
90
+
91
+ infobox = soup.find("table", class_="infobox")
92
+ if not infobox:
93
+ return {}
94
+
95
+ data = {}
96
+ for row in infobox.find_all("tr"):
97
+ header = row.find("th")
98
+ cell = row.find("td")
99
+ if header and cell:
100
+ key = header.get_text(strip=True)
101
+ value = cell.get_text(strip=True)
102
+ # Clean up the value
103
+ value = re.sub(r'\[\d+\]', '', value) # Remove references
104
+ data[key] = value
105
+
106
+ return data
107
+
108
+ def scrape_person(self, name: str) -> Optional[Dict]:
109
+ """
110
+ Scrape dados de uma pessoa da Wikipedia
111
+ Retorna dados formatados para criar uma Entity
112
+ """
113
+ article = self.get_article(name)
114
+ if not article:
115
+ return None
116
+
117
+ infobox = self.get_infobox(name)
118
+
119
+ return {
120
+ "type": "person",
121
+ "name": article["title"],
122
+ "description": article.get("extract"),
123
+ "source": "wikipedia",
124
+ "source_url": article["url"],
125
+ "properties": {
126
+ "thumbnail": article.get("thumbnail"),
127
+ "categories": article.get("categories", []),
128
+ **infobox
129
+ },
130
+ "latitude": article.get("latitude"),
131
+ "longitude": article.get("longitude")
132
+ }
133
+
134
+ def scrape_organization(self, name: str) -> Optional[Dict]:
135
+ """
136
+ Scrape dados de uma organização da Wikipedia
137
+ """
138
+ article = self.get_article(name)
139
+ if not article:
140
+ return None
141
+
142
+ infobox = self.get_infobox(name)
143
+
144
+ return {
145
+ "type": "organization",
146
+ "name": article["title"],
147
+ "description": article.get("extract"),
148
+ "source": "wikipedia",
149
+ "source_url": article["url"],
150
+ "properties": {
151
+ "thumbnail": article.get("thumbnail"),
152
+ "categories": article.get("categories", []),
153
+ **infobox
154
+ },
155
+ "latitude": article.get("latitude"),
156
+ "longitude": article.get("longitude")
157
+ }
158
+
159
+ def scrape_location(self, name: str) -> Optional[Dict]:
160
+ """
161
+ Scrape dados de um local da Wikipedia
162
+ """
163
+ article = self.get_article(name)
164
+ if not article:
165
+ return None
166
+
167
+ infobox = self.get_infobox(name)
168
+
169
+ return {
170
+ "type": "location",
171
+ "name": article["title"],
172
+ "description": article.get("extract"),
173
+ "source": "wikipedia",
174
+ "source_url": article["url"],
175
+ "properties": {
176
+ "thumbnail": article.get("thumbnail"),
177
+ "categories": article.get("categories", []),
178
+ **infobox
179
+ },
180
+ "latitude": article.get("latitude"),
181
+ "longitude": article.get("longitude")
182
+ }
183
+
184
+
185
+ # Singleton instance
186
+ wikipedia_scraper = WikipediaScraper()
data/numidium.db ADDED
Binary file (53.2 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ sqlalchemy==2.0.23
4
+ pydantic==2.5.2
5
+ pydantic-settings==2.1.0
6
+ requests==2.31.0
7
+ beautifulsoup4==4.12.2
8
+ lxml==4.9.3
9
+ httpx==0.25.2
10
+ python-multipart==0.0.6
11
+ aiohttp==3.9.1
12
+ feedparser==6.0.10