Spaces:
Sleeping
Sleeping
Pablo commited on
Commit ·
cf0a8ed
1
Parent(s): a619d03
feat: APOHARA: Context Forge V5 — synthesis + rebrand complete
Browse filesPhase 1 — Merge from CC:
- uv.lock (1640 entries, full reproducibility)
- benchmark_v4/v5: honest cold/warm/off protocol + delta_pct=None
- 4 CC test files merged (277 collected, 0 failures)
Phase 2 — Rebrand:
- contextforge/ → apohara_context_forge/ (25 files)
- pyproject.toml: name=apohara-context-forge, entry=apohara
- Dockerfile, docker-compose, .env.example updated
Surgical fixes:
- AnchorPool.update_pool(): token_ids → str before encode()
- VRAMAwareCache: respect pre-set _mode when pressure=None
- BudgetManager: COT/RAG detection order before agent
- TokenCounter: self._use_fallback init
- Test guards: onnxruntime + faiss skipif decorators
Final: 277 collected · 0 failed · hermetic suite ✓
This view is limited to 50 files because it contains too many changes. See raw diff
- .env.example +1 -1
- Dockerfile +1 -1
- README.md +11 -44
- agents/__pycache__/__init__.cpython-314.pyc +0 -0
- agents/__pycache__/base_agent.cpython-314.pyc +0 -0
- agents/__pycache__/demo_agents.cpython-314.pyc +0 -0
- agents/__pycache__/pipeline.cpython-314.pyc +0 -0
- agents/base_agent.py +1 -1
- agents/pipeline.py +6 -6
- apohara_context_forge.egg-info/PKG-INFO +30 -0
- apohara_context_forge.egg-info/SOURCES.txt +85 -0
- apohara_context_forge.egg-info/dependency_links.txt +1 -0
- apohara_context_forge.egg-info/entry_points.txt +2 -0
- apohara_context_forge.egg-info/requires.txt +25 -0
- apohara_context_forge.egg-info/top_level.txt +3 -0
- {contextforge → apohara_context_forge}/__init__.py +7 -7
- apohara_context_forge/__pycache__/__init__.cpython-312.pyc +0 -0
- apohara_context_forge/__pycache__/__init__.cpython-314.pyc +0 -0
- {contextforge → apohara_context_forge}/__pycache__/config.cpython-314.pyc +0 -0
- apohara_context_forge/__pycache__/main.cpython-314.pyc +0 -0
- apohara_context_forge/__pycache__/models.cpython-312.pyc +0 -0
- apohara_context_forge/__pycache__/models.cpython-314.pyc +0 -0
- apohara_context_forge/__pycache__/pipeline_config.cpython-312.pyc +0 -0
- apohara_context_forge/__pycache__/pipeline_config.cpython-314.pyc +0 -0
- apohara_context_forge/__pycache__/token_counter.cpython-312.pyc +0 -0
- apohara_context_forge/__pycache__/token_counter.cpython-314.pyc +0 -0
- {contextforge → apohara_context_forge}/compression/__init__.py +0 -0
- apohara_context_forge/compression/__pycache__/__init__.cpython-312.pyc +0 -0
- apohara_context_forge/compression/__pycache__/__init__.cpython-314.pyc +0 -0
- apohara_context_forge/compression/__pycache__/budget_manager.cpython-312.pyc +0 -0
- apohara_context_forge/compression/__pycache__/budget_manager.cpython-314.pyc +0 -0
- apohara_context_forge/compression/__pycache__/compressor.cpython-312.pyc +0 -0
- apohara_context_forge/compression/__pycache__/compressor.cpython-314.pyc +0 -0
- apohara_context_forge/compression/__pycache__/coordinator.cpython-314.pyc +0 -0
- {contextforge → apohara_context_forge}/compression/budget_manager.py +10 -8
- {contextforge → apohara_context_forge}/compression/compressor.py +3 -3
- {contextforge → apohara_context_forge}/compression/coordinator.py +6 -6
- {contextforge → apohara_context_forge}/config.py +0 -0
- {contextforge → apohara_context_forge}/decoding/__init__.py +1 -1
- apohara_context_forge/decoding/__pycache__/__init__.cpython-314.pyc +0 -0
- apohara_context_forge/decoding/__pycache__/speculative_coordinator.cpython-314.pyc +0 -0
- {contextforge → apohara_context_forge}/decoding/speculative_coordinator.py +1 -1
- {contextforge → apohara_context_forge}/dedup/__init__.py +0 -0
- apohara_context_forge/dedup/__pycache__/__init__.cpython-312.pyc +0 -0
- apohara_context_forge/dedup/__pycache__/__init__.cpython-314.pyc +0 -0
- apohara_context_forge/dedup/__pycache__/_deprecated_dedup_engine.cpython-314.pyc +0 -0
- apohara_context_forge/dedup/__pycache__/embedder.cpython-314.pyc +0 -0
- apohara_context_forge/dedup/__pycache__/faiss_index.cpython-312.pyc +0 -0
- apohara_context_forge/dedup/__pycache__/faiss_index.cpython-314.pyc +0 -0
- apohara_context_forge/dedup/__pycache__/lsh_engine.cpython-312.pyc +0 -0
.env.example
CHANGED
|
@@ -3,7 +3,7 @@ VLLM_BASE_URL=http://localhost:8000
|
|
| 3 |
VLLM_MODEL=Qwen/Qwen3.6-35B-A3B
|
| 4 |
VLLM_API_KEY=contextforge-local
|
| 5 |
|
| 6 |
-
#
|
| 7 |
CONTEXTFORGE_HOST=0.0.0.0
|
| 8 |
CONTEXTFORGE_PORT=8001
|
| 9 |
CONTEXTFORGE_TTL_SECONDS=300
|
|
|
|
| 3 |
VLLM_MODEL=Qwen/Qwen3.6-35B-A3B
|
| 4 |
VLLM_API_KEY=contextforge-local
|
| 5 |
|
| 6 |
+
# APOHARA: Context Forge
|
| 7 |
CONTEXTFORGE_HOST=0.0.0.0
|
| 8 |
CONTEXTFORGE_PORT=8001
|
| 9 |
CONTEXTFORGE_TTL_SECONDS=300
|
Dockerfile
CHANGED
|
@@ -15,4 +15,4 @@ COPY . .
|
|
| 15 |
|
| 16 |
EXPOSE 8001
|
| 17 |
|
| 18 |
-
CMD ["python", "-m", "
|
|
|
|
| 15 |
|
| 16 |
EXPOSE 8001
|
| 17 |
|
| 18 |
+
CMD ["python", "-m", "apohara_context_forge.main"]
|
README.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# APOHARA V1.0 — ContextForge
|
| 2 |
|
| 3 |
```
|
|
@@ -62,46 +66,9 @@ ContextForge coordinates KV block sharing across all agents through 8 peer-revie
|
|
| 62 |
|
| 63 |
Every optimization traces back to a peer-reviewed paper published at **NeurIPS, ICML, ACL, or IJCAI**.
|
| 64 |
|
| 65 |
-
|
| 66 |
-
WITH ContextForge
|
| 67 |
-
|
| 68 |
-
│ AMD Instinct MI300X — 192 GB HBM3 │
|
| 69 |
-
│ ┌────────────────────────────────────────────────────────────────────────┐ │
|
| 70 |
-
│ │ vLLMAtomPlugin (entry_point: vllm.general_plugins) │ │
|
| 71 |
-
│ │ pre/post hooks · KV offset routing · ROCm-native │ │
|
| 72 |
-
│ └────────────────────────────────┬────────────────────────────────────────┘ │
|
| 73 |
-
│ ▼ │
|
| 74 |
-
│ ┌──────────────────────────────────────────────────────────────────────────┐ │
|
| 75 |
-
│ │ VRAMAwareCache + QueueingController (ICML 2026) │ │
|
| 76 |
-
│ │ λ_critical stability · Welford E[S] · INVARIANT-11 │ │
|
| 77 |
-
│ └────────────────────────────┬────────────────────────────────────────────┘ │
|
| 78 |
-
│ ▼ │
|
| 79 |
-
│ ┌──────────────┐ ┌─────────────┐ ┌─────────────┐ ┌───────────────────┐ │
|
| 80 |
-
│ │AnchorPool │ │CLAMetadata │ │StepGraph │ │RotateKV │ │
|
| 81 |
-
│ │KVCOMM │ │CLA/LCKV │ │KVFlow │ │INT4 pre-RoPE │ │
|
| 82 |
-
│ │simhash anchor│ │NAACL 2025 │ │eviction │ │3.97× compression │ │
|
| 83 |
-
│ └──────┬───────┘ └──────┬─────┘ └──────┬─────┘ └─────────┬─────────┘ │
|
| 84 |
-
│ │ │ │ │ │
|
| 85 |
-
│ └─────────────────┴───────────────┴──────────────────┘ │
|
| 86 |
-
│ ▼ │
|
| 87 |
-
│ ┌────────────────────────────────────────────────────────────────────────┐ │
|
| 88 |
-
│ │ ContextRegistry (all modules wired, DI) │ │
|
| 89 |
-
│ │ LSHEngine + FAISSContextIndex · PBKVPredictor · SpeculativeCoordinator │ │
|
| 90 |
-
│ └────────────────────────────────┬────────────────────────────────────────┘ │
|
| 91 |
-
│ ▼ │
|
| 92 |
-
│ ┌───────────────────┐ ┌─────────────────────┐ ┌───────────────────┐ │
|
| 93 |
-
│ │ LMCacheBridge │ │ KVAwareRouter │ │ VisualKVCache │ │
|
| 94 |
-
│ │ cross-worker │ │ anchor locality │ │ SHA256 dedup │ │
|
| 95 |
-
│ │ │ │ CLA affinity │ │ +44.9% throughput │ │
|
| 96 |
-
│ └────────┬──────────┘ └──────────┬───────────┘ └───────────────────┘ │
|
| 97 |
-
│ └──────────────────────────┴─────────────────────────────────────��┘ │
|
| 98 |
-
│ │
|
| 99 |
-
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
| 100 |
-
│ │Retriever │ │Reranker │ │Summarizer│ │ Critic │ │Responder │ │
|
| 101 |
-
│ │(fast) │ │(fast) │ │(fast) │ │(CoT) │ │(final) │ │
|
| 102 |
-
│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │
|
| 103 |
-
└────────────────────────────────────────────────────────────────────────────────┘
|
| 104 |
-
```
|
| 105 |
|
| 106 |
---
|
| 107 |
|
|
@@ -184,7 +151,7 @@ Cost to validate on AMD DevCloud (MI300X x1):
|
|
| 184 |
## 🏗️ Architecture
|
| 185 |
|
| 186 |
```
|
| 187 |
-
|
| 188 |
├── __init__.py
|
| 189 |
├── main.py
|
| 190 |
├── config.py
|
|
@@ -402,8 +369,8 @@ pytest tests/ -v --tb=short
|
|
| 402 |
**AMD DevCloud (MI300X)** — Primary target hardware
|
| 403 |
|
| 404 |
```bash
|
| 405 |
-
git clone https://github.com/SuarezPM/ContextForge
|
| 406 |
-
cd ContextForge
|
| 407 |
pip install -e ".[rocm]"
|
| 408 |
pip install qwen3-embed onnxruntime streamlit prometheus-client --quiet
|
| 409 |
|
|
@@ -434,7 +401,7 @@ streamlit run demo/dashboard.py -- --mock
|
|
| 434 |
**Docker**
|
| 435 |
|
| 436 |
```bash
|
| 437 |
-
docker compose up
|
| 438 |
```
|
| 439 |
|
| 440 |
<!-- PLACEHOLDER:DEVCLOUD_SETUP_VIDEO -->
|
|
|
|
| 1 |
+
<p align="center">
|
| 2 |
+
<img src="assets/apohara-contextforge-logo.png" alt="Apohara : Context Forge" width="420">
|
| 3 |
+
</p>
|
| 4 |
+
|
| 5 |
# APOHARA V1.0 — ContextForge
|
| 6 |
|
| 7 |
```
|
|
|
|
| 66 |
|
| 67 |
Every optimization traces back to a peer-reviewed paper published at **NeurIPS, ICML, ACL, or IJCAI**.
|
| 68 |
|
| 69 |
+
<p align="center">
|
| 70 |
+
<img src="assets/systems-diagram.jpeg" alt="WITH ContextForge — shared KV via ATOM plugin (systems diagram)" width="720">
|
| 71 |
+
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
---
|
| 74 |
|
|
|
|
| 151 |
## 🏗️ Architecture
|
| 152 |
|
| 153 |
```
|
| 154 |
+
apohara_context_forge/
|
| 155 |
├── __init__.py
|
| 156 |
├── main.py
|
| 157 |
├── config.py
|
|
|
|
| 369 |
**AMD DevCloud (MI300X)** — Primary target hardware
|
| 370 |
|
| 371 |
```bash
|
| 372 |
+
git clone https://github.com/SuarezPM/Apohara-ContextForge
|
| 373 |
+
cd Apohara-ContextForge
|
| 374 |
pip install -e ".[rocm]"
|
| 375 |
pip install qwen3-embed onnxruntime streamlit prometheus-client --quiet
|
| 376 |
|
|
|
|
| 401 |
**Docker**
|
| 402 |
|
| 403 |
```bash
|
| 404 |
+
docker compose up apohara
|
| 405 |
```
|
| 406 |
|
| 407 |
<!-- PLACEHOLDER:DEVCLOUD_SETUP_VIDEO -->
|
agents/__pycache__/__init__.cpython-314.pyc
CHANGED
|
Binary files a/agents/__pycache__/__init__.cpython-314.pyc and b/agents/__pycache__/__init__.cpython-314.pyc differ
|
|
|
agents/__pycache__/base_agent.cpython-314.pyc
CHANGED
|
Binary files a/agents/__pycache__/base_agent.cpython-314.pyc and b/agents/__pycache__/base_agent.cpython-314.pyc differ
|
|
|
agents/__pycache__/demo_agents.cpython-314.pyc
CHANGED
|
Binary files a/agents/__pycache__/demo_agents.cpython-314.pyc and b/agents/__pycache__/demo_agents.cpython-314.pyc differ
|
|
|
agents/__pycache__/pipeline.cpython-314.pyc
CHANGED
|
Binary files a/agents/__pycache__/pipeline.cpython-314.pyc and b/agents/__pycache__/pipeline.cpython-314.pyc differ
|
|
|
agents/base_agent.py
CHANGED
|
@@ -6,7 +6,7 @@ import time
|
|
| 6 |
|
| 7 |
import httpx
|
| 8 |
|
| 9 |
-
from
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
|
|
|
| 6 |
|
| 7 |
import httpx
|
| 8 |
|
| 9 |
+
from apohara_context_forge.config import settings
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
agents/pipeline.py
CHANGED
|
@@ -6,12 +6,12 @@ from typing import Any, Optional
|
|
| 6 |
|
| 7 |
from agents.demo_agents import create_agents
|
| 8 |
|
| 9 |
-
from
|
| 10 |
-
from
|
| 11 |
-
from
|
| 12 |
-
from
|
| 13 |
-
from
|
| 14 |
-
from
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
|
|
|
| 6 |
|
| 7 |
from agents.demo_agents import create_agents
|
| 8 |
|
| 9 |
+
from apohara_context_forge.dedup.faiss_index import FAISSContextIndex
|
| 10 |
+
from apohara_context_forge.dedup.lsh_engine import LSHTokenMatcher
|
| 11 |
+
from apohara_context_forge.metrics.vram_monitor import VRAMMonitor
|
| 12 |
+
from apohara_context_forge.pipeline_config import PipelineConfig
|
| 13 |
+
from apohara_context_forge.registry.context_registry import ContextRegistry
|
| 14 |
+
from apohara_context_forge.registry.vram_aware_cache import VRAMAwareCache
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
apohara_context_forge.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: apohara-context-forge
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: APOHARA: Context Forge — Silicon-native KV cache coordination for multi-agent LLM pipelines on AMD Instinct MI300X
|
| 5 |
+
License: MIT
|
| 6 |
+
Requires-Python: <3.13,>=3.11
|
| 7 |
+
Requires-Dist: fastapi<0.116,>=0.115
|
| 8 |
+
Requires-Dist: uvicorn[standard]<0.33,>=0.32
|
| 9 |
+
Requires-Dist: pydantic<3,>=2.9
|
| 10 |
+
Requires-Dist: pydantic-settings<3,>=2.6
|
| 11 |
+
Requires-Dist: httpx<0.28,>=0.27
|
| 12 |
+
Requires-Dist: sentence-transformers<4,>=3.3
|
| 13 |
+
Requires-Dist: llmlingua<0.3,>=0.2.2
|
| 14 |
+
Requires-Dist: torch<2.6,>=2.4
|
| 15 |
+
Requires-Dist: gradio<6,>=5.7
|
| 16 |
+
Requires-Dist: plotly<6,>=5.24
|
| 17 |
+
Requires-Dist: numpy<2.2,>=1.26
|
| 18 |
+
Requires-Dist: aiofiles<25,>=24.1
|
| 19 |
+
Requires-Dist: rich<14,>=13.9
|
| 20 |
+
Requires-Dist: psutil<8,>=5.9
|
| 21 |
+
Provides-Extra: dev
|
| 22 |
+
Requires-Dist: pytest>=8.3; extra == "dev"
|
| 23 |
+
Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
|
| 24 |
+
Requires-Dist: ruff>=0.7; extra == "dev"
|
| 25 |
+
Requires-Dist: fastapi; extra == "dev"
|
| 26 |
+
Requires-Dist: httpx; extra == "dev"
|
| 27 |
+
Requires-Dist: gradio; extra == "dev"
|
| 28 |
+
Requires-Dist: streamlit; extra == "dev"
|
| 29 |
+
Requires-Dist: anyio; extra == "dev"
|
| 30 |
+
Requires-Dist: pytest-anyio; extra == "dev"
|
apohara_context_forge.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
pyproject.toml
|
| 3 |
+
agents/__init__.py
|
| 4 |
+
agents/base_agent.py
|
| 5 |
+
agents/demo_agents.py
|
| 6 |
+
agents/pipeline.py
|
| 7 |
+
apohara_context_forge/__init__.py
|
| 8 |
+
apohara_context_forge/config.py
|
| 9 |
+
apohara_context_forge/main.py
|
| 10 |
+
apohara_context_forge/models.py
|
| 11 |
+
apohara_context_forge/pipeline_config.py
|
| 12 |
+
apohara_context_forge/token_counter.py
|
| 13 |
+
apohara_context_forge.egg-info/PKG-INFO
|
| 14 |
+
apohara_context_forge.egg-info/SOURCES.txt
|
| 15 |
+
apohara_context_forge.egg-info/dependency_links.txt
|
| 16 |
+
apohara_context_forge.egg-info/entry_points.txt
|
| 17 |
+
apohara_context_forge.egg-info/requires.txt
|
| 18 |
+
apohara_context_forge.egg-info/top_level.txt
|
| 19 |
+
apohara_context_forge/compression/__init__.py
|
| 20 |
+
apohara_context_forge/compression/budget_manager.py
|
| 21 |
+
apohara_context_forge/compression/compressor.py
|
| 22 |
+
apohara_context_forge/compression/coordinator.py
|
| 23 |
+
apohara_context_forge/decoding/__init__.py
|
| 24 |
+
apohara_context_forge/decoding/speculative_coordinator.py
|
| 25 |
+
apohara_context_forge/dedup/__init__.py
|
| 26 |
+
apohara_context_forge/dedup/_deprecated_dedup_engine.py
|
| 27 |
+
apohara_context_forge/dedup/cosine.py
|
| 28 |
+
apohara_context_forge/dedup/embedder.py
|
| 29 |
+
apohara_context_forge/dedup/faiss_index.py
|
| 30 |
+
apohara_context_forge/dedup/lsh_engine.py
|
| 31 |
+
apohara_context_forge/embeddings/__init__.py
|
| 32 |
+
apohara_context_forge/embeddings/embedding_engine.py
|
| 33 |
+
apohara_context_forge/kv_offset/__init__.py
|
| 34 |
+
apohara_context_forge/kv_offset/anchor_pool.py
|
| 35 |
+
apohara_context_forge/kv_offset/cla_metadata.py
|
| 36 |
+
apohara_context_forge/mcp/__init__.py
|
| 37 |
+
apohara_context_forge/mcp/server.py
|
| 38 |
+
apohara_context_forge/metrics/__init__.py
|
| 39 |
+
apohara_context_forge/metrics/collector.py
|
| 40 |
+
apohara_context_forge/metrics/prometheus_metrics.py
|
| 41 |
+
apohara_context_forge/metrics/vram_monitor.py
|
| 42 |
+
apohara_context_forge/multimodal/__init__.py
|
| 43 |
+
apohara_context_forge/multimodal/visual_kv_cache.py
|
| 44 |
+
apohara_context_forge/normalization/__init__.py
|
| 45 |
+
apohara_context_forge/normalization/prefix_normalizer.py
|
| 46 |
+
apohara_context_forge/quantization/rotate_kv.py
|
| 47 |
+
apohara_context_forge/registry/__init__.py
|
| 48 |
+
apohara_context_forge/registry/_deprecated_ttl_cache.py
|
| 49 |
+
apohara_context_forge/registry/context_registry.py
|
| 50 |
+
apohara_context_forge/registry/vram_aware_cache.py
|
| 51 |
+
apohara_context_forge/routing/kv_aware_router.py
|
| 52 |
+
apohara_context_forge/scheduling/pbkv_predictor.py
|
| 53 |
+
apohara_context_forge/scheduling/queueing_controller.py
|
| 54 |
+
apohara_context_forge/scheduling/step_graph.py
|
| 55 |
+
apohara_context_forge/serving/__init__.py
|
| 56 |
+
apohara_context_forge/serving/atom_plugin.py
|
| 57 |
+
apohara_context_forge/serving/lmcache_bridge.py
|
| 58 |
+
apohara_context_forge/serving/vllm_client.py
|
| 59 |
+
demo/__init__.py
|
| 60 |
+
demo/app.py
|
| 61 |
+
demo/benchmark.py
|
| 62 |
+
demo/benchmark_v4.py
|
| 63 |
+
demo/benchmark_v5.py
|
| 64 |
+
demo/dashboard.py
|
| 65 |
+
tests/test_atom_plugin.py
|
| 66 |
+
tests/test_benchmark.py
|
| 67 |
+
tests/test_cla_metadata.py
|
| 68 |
+
tests/test_compressor.py
|
| 69 |
+
tests/test_coordinator.py
|
| 70 |
+
tests/test_dedup.py
|
| 71 |
+
tests/test_embedding_engine.py
|
| 72 |
+
tests/test_integration.py
|
| 73 |
+
tests/test_kv_aware_router.py
|
| 74 |
+
tests/test_kv_offset.py
|
| 75 |
+
tests/test_lmcache_bridge.py
|
| 76 |
+
tests/test_mcp_server.py
|
| 77 |
+
tests/test_normalization.py
|
| 78 |
+
tests/test_pbkv_predictor.py
|
| 79 |
+
tests/test_pipeline.py
|
| 80 |
+
tests/test_queueing_controller.py
|
| 81 |
+
tests/test_registry.py
|
| 82 |
+
tests/test_rotate_kv.py
|
| 83 |
+
tests/test_speculative_coordinator.py
|
| 84 |
+
tests/test_step_graph.py
|
| 85 |
+
tests/test_visual_kv_cache.py
|
apohara_context_forge.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
apohara_context_forge.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
apohara = apohara_context_forge.main:main
|
apohara_context_forge.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi<0.116,>=0.115
|
| 2 |
+
uvicorn[standard]<0.33,>=0.32
|
| 3 |
+
pydantic<3,>=2.9
|
| 4 |
+
pydantic-settings<3,>=2.6
|
| 5 |
+
httpx<0.28,>=0.27
|
| 6 |
+
sentence-transformers<4,>=3.3
|
| 7 |
+
llmlingua<0.3,>=0.2.2
|
| 8 |
+
torch<2.6,>=2.4
|
| 9 |
+
gradio<6,>=5.7
|
| 10 |
+
plotly<6,>=5.24
|
| 11 |
+
numpy<2.2,>=1.26
|
| 12 |
+
aiofiles<25,>=24.1
|
| 13 |
+
rich<14,>=13.9
|
| 14 |
+
psutil<8,>=5.9
|
| 15 |
+
|
| 16 |
+
[dev]
|
| 17 |
+
pytest>=8.3
|
| 18 |
+
pytest-asyncio>=0.24
|
| 19 |
+
ruff>=0.7
|
| 20 |
+
fastapi
|
| 21 |
+
httpx
|
| 22 |
+
gradio
|
| 23 |
+
streamlit
|
| 24 |
+
anyio
|
| 25 |
+
pytest-anyio
|
apohara_context_forge.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
agents
|
| 2 |
+
apohara_context_forge
|
| 3 |
+
demo
|
{contextforge → apohara_context_forge}/__init__.py
RENAMED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
"""ContextForge - Shared context compiler for multi-agent LLM systems on AMD MI300X."""
|
| 2 |
__version__ = "3.0.0"
|
| 3 |
|
| 4 |
-
from
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
-
from
|
| 8 |
-
from
|
| 9 |
-
from
|
| 10 |
-
from
|
| 11 |
|
| 12 |
__all__ = [
|
| 13 |
# Core registry
|
|
|
|
| 1 |
"""ContextForge - Shared context compiler for multi-agent LLM systems on AMD MI300X."""
|
| 2 |
__version__ = "3.0.0"
|
| 3 |
|
| 4 |
+
from apohara_context_forge.registry.context_registry import ContextRegistry, SharedContextResult, RegisteredAgent
|
| 5 |
+
from apohara_context_forge.pipeline_config import PipelineConfig
|
| 6 |
+
from apohara_context_forge.token_counter import TokenCounter, count_tokens, encode_tokens, compute_kv_gb
|
| 7 |
+
from apohara_context_forge.metrics.vram_monitor import VRAMMonitor, get_monitor, get_vram_pressure
|
| 8 |
+
from apohara_context_forge.dedup.lsh_engine import LSHTokenMatcher, TokenBlockMatch
|
| 9 |
+
from apohara_context_forge.dedup.faiss_index import FAISSContextIndex, FAISSMatch
|
| 10 |
+
from apohara_context_forge.registry.vram_aware_cache import VRAMAwareCache, EvictionMode
|
| 11 |
|
| 12 |
__all__ = [
|
| 13 |
# Core registry
|
apohara_context_forge/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (1.23 kB). View file
|
|
|
apohara_context_forge/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (1.22 kB). View file
|
|
|
{contextforge → apohara_context_forge}/__pycache__/config.cpython-314.pyc
RENAMED
|
Binary files a/contextforge/__pycache__/config.cpython-314.pyc and b/apohara_context_forge/__pycache__/config.cpython-314.pyc differ
|
|
|
apohara_context_forge/__pycache__/main.cpython-314.pyc
ADDED
|
Binary file (2.22 kB). View file
|
|
|
apohara_context_forge/__pycache__/models.cpython-312.pyc
ADDED
|
Binary file (3.24 kB). View file
|
|
|
apohara_context_forge/__pycache__/models.cpython-314.pyc
ADDED
|
Binary file (4.91 kB). View file
|
|
|
apohara_context_forge/__pycache__/pipeline_config.cpython-312.pyc
ADDED
|
Binary file (2.3 kB). View file
|
|
|
apohara_context_forge/__pycache__/pipeline_config.cpython-314.pyc
ADDED
|
Binary file (2.78 kB). View file
|
|
|
apohara_context_forge/__pycache__/token_counter.cpython-312.pyc
ADDED
|
Binary file (8.45 kB). View file
|
|
|
apohara_context_forge/__pycache__/token_counter.cpython-314.pyc
ADDED
|
Binary file (10.5 kB). View file
|
|
|
{contextforge → apohara_context_forge}/compression/__init__.py
RENAMED
|
File without changes
|
apohara_context_forge/compression/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (252 Bytes). View file
|
|
|
apohara_context_forge/compression/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (254 Bytes). View file
|
|
|
apohara_context_forge/compression/__pycache__/budget_manager.cpython-312.pyc
ADDED
|
Binary file (12 kB). View file
|
|
|
apohara_context_forge/compression/__pycache__/budget_manager.cpython-314.pyc
ADDED
|
Binary file (13.5 kB). View file
|
|
|
apohara_context_forge/compression/__pycache__/compressor.cpython-312.pyc
ADDED
|
Binary file (3.82 kB). View file
|
|
|
apohara_context_forge/compression/__pycache__/compressor.cpython-314.pyc
ADDED
|
Binary file (4.65 kB). View file
|
|
|
apohara_context_forge/compression/__pycache__/coordinator.cpython-314.pyc
ADDED
|
Binary file (4.26 kB). View file
|
|
|
{contextforge → apohara_context_forge}/compression/budget_manager.py
RENAMED
|
@@ -171,7 +171,7 @@ class CompressionBudgetManager:
|
|
| 171 |
Returns:
|
| 172 |
CompressionPlan with decision and parameters
|
| 173 |
"""
|
| 174 |
-
from
|
| 175 |
|
| 176 |
if token_count is None:
|
| 177 |
token_count = TokenCounter.get().count(segment)
|
|
@@ -238,7 +238,7 @@ class CompressionBudgetManager:
|
|
| 238 |
if not plan.should_compress:
|
| 239 |
return plan.segment, 1.0
|
| 240 |
|
| 241 |
-
from
|
| 242 |
|
| 243 |
compressor = ContextCompressor()
|
| 244 |
await compressor.load()
|
|
@@ -288,15 +288,17 @@ def detect_segment_type(segment: str) -> SegmentType:
|
|
| 288 |
if indicator.lower() in segment.lower()[:100]:
|
| 289 |
return SegmentType.TOOL_RESULT
|
| 290 |
|
| 291 |
-
# Check for agent
|
| 292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
if any(ind in segment.lower()[:150] for ind in agent_indicators):
|
| 294 |
return SegmentType.AGENT_OUTPUT
|
| 295 |
|
| 296 |
-
# Check for CoT reasoning
|
| 297 |
-
if all(ind in segment.lower() for ind in ["step", "reasoning"]) or "step by step" in segment.lower():
|
| 298 |
-
return SegmentType.COT_REASONING
|
| 299 |
-
|
| 300 |
# Check for RAG/retrieved content
|
| 301 |
rag_indicators = ["document", "retrieved", "context:", "reference:"]
|
| 302 |
if any(ind in segment.lower()[:200] for ind in rag_indicators):
|
|
|
|
| 171 |
Returns:
|
| 172 |
CompressionPlan with decision and parameters
|
| 173 |
"""
|
| 174 |
+
from apohara_context_forge.token_counter import TokenCounter
|
| 175 |
|
| 176 |
if token_count is None:
|
| 177 |
token_count = TokenCounter.get().count(segment)
|
|
|
|
| 238 |
if not plan.should_compress:
|
| 239 |
return plan.segment, 1.0
|
| 240 |
|
| 241 |
+
from apohara_context_forge.compression.compressor import ContextCompressor
|
| 242 |
|
| 243 |
compressor = ContextCompressor()
|
| 244 |
await compressor.load()
|
|
|
|
| 288 |
if indicator.lower() in segment.lower()[:100]:
|
| 289 |
return SegmentType.TOOL_RESULT
|
| 290 |
|
| 291 |
+
# Check for CoT reasoning FIRST (before agent — "step" + "reasoning" without ":")
|
| 292 |
+
if "step by step" in segment.lower() or (
|
| 293 |
+
"step" in segment.lower() and "reasoning" in segment.lower()
|
| 294 |
+
):
|
| 295 |
+
return SegmentType.COT_REASONING
|
| 296 |
+
|
| 297 |
+
# Check for agent output indicators (after CoT)
|
| 298 |
+
agent_indicators = ["summarized", "analyzed", "reasoning:", "step"]
|
| 299 |
if any(ind in segment.lower()[:150] for ind in agent_indicators):
|
| 300 |
return SegmentType.AGENT_OUTPUT
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
# Check for RAG/retrieved content
|
| 303 |
rag_indicators = ["document", "retrieved", "context:", "reference:"]
|
| 304 |
if any(ind in segment.lower()[:200] for ind in rag_indicators):
|
{contextforge → apohara_context_forge}/compression/compressor.py
RENAMED
|
@@ -3,7 +3,7 @@ import asyncio
|
|
| 3 |
import logging
|
| 4 |
from typing import Literal
|
| 5 |
|
| 6 |
-
from llmlingua import
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
|
@@ -13,7 +13,7 @@ class ContextCompressor:
|
|
| 13 |
|
| 14 |
def __init__(self, model_name: str = "microsoft/llmlingua-2-xlm-roberta-large-meetingbank"):
|
| 15 |
self._model_name = model_name
|
| 16 |
-
self._model:
|
| 17 |
self._lock = asyncio.Lock()
|
| 18 |
|
| 19 |
async def load(self) -> None:
|
|
@@ -22,7 +22,7 @@ class ContextCompressor:
|
|
| 22 |
async with self._lock:
|
| 23 |
if self._model is None:
|
| 24 |
logger.info(f"Loading compressor: {self._model_name}")
|
| 25 |
-
self._model =
|
| 26 |
|
| 27 |
async def compress(self, context: str, rate: float = 0.5) -> tuple[str, float]:
|
| 28 |
"""
|
|
|
|
| 3 |
import logging
|
| 4 |
from typing import Literal
|
| 5 |
|
| 6 |
+
from llmlingua import PromptCompressor
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
|
|
|
| 13 |
|
| 14 |
def __init__(self, model_name: str = "microsoft/llmlingua-2-xlm-roberta-large-meetingbank"):
|
| 15 |
self._model_name = model_name
|
| 16 |
+
self._model: PromptCompressor | None = None
|
| 17 |
self._lock = asyncio.Lock()
|
| 18 |
|
| 19 |
async def load(self) -> None:
|
|
|
|
| 22 |
async with self._lock:
|
| 23 |
if self._model is None:
|
| 24 |
logger.info(f"Loading compressor: {self._model_name}")
|
| 25 |
+
self._model = PromptCompressor(self._model_name)
|
| 26 |
|
| 27 |
async def compress(self, context: str, rate: float = 0.5) -> tuple[str, float]:
|
| 28 |
"""
|
{contextforge → apohara_context_forge}/compression/coordinator.py
RENAMED
|
@@ -3,9 +3,9 @@ import asyncio
|
|
| 3 |
import logging
|
| 4 |
from typing import Literal
|
| 5 |
|
| 6 |
-
from
|
| 7 |
-
from
|
| 8 |
-
from
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
|
@@ -27,7 +27,7 @@ class CompressionCoordinator:
|
|
| 27 |
|
| 28 |
async def decide(self, agent_id: str, context: str) -> CompressionDecision:
|
| 29 |
"""Make compression decision for an agent's context."""
|
| 30 |
-
from
|
| 31 |
|
| 32 |
registry = ContextRegistry()
|
| 33 |
original_tokens = len(context.split())
|
|
@@ -60,7 +60,7 @@ class CompressionCoordinator:
|
|
| 60 |
)
|
| 61 |
elif similarity < 0.85 and original_tokens > 500:
|
| 62 |
# Compress only
|
| 63 |
-
from
|
| 64 |
compressor = ContextCompressor()
|
| 65 |
compressed, ratio = await compressor.compress(context, settings.contextforge_compression_rate)
|
| 66 |
final_tokens = len(compressed.split())
|
|
@@ -73,7 +73,7 @@ class CompressionCoordinator:
|
|
| 73 |
)
|
| 74 |
elif similarity >= 0.85 and original_tokens > 500:
|
| 75 |
# Both reuse and compress
|
| 76 |
-
from
|
| 77 |
compressor = ContextCompressor()
|
| 78 |
compressed, ratio = await compressor.compress(context, settings.contextforge_compression_rate)
|
| 79 |
final_tokens = len(compressed.split())
|
|
|
|
| 3 |
import logging
|
| 4 |
from typing import Literal
|
| 5 |
|
| 6 |
+
from apohara_context_forge.config import settings
|
| 7 |
+
from apohara_context_forge.dedup.dedup_engine import SemanticDedupEngine
|
| 8 |
+
from apohara_context_forge.models import CompressionDecision
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
|
|
|
| 27 |
|
| 28 |
async def decide(self, agent_id: str, context: str) -> CompressionDecision:
|
| 29 |
"""Make compression decision for an agent's context."""
|
| 30 |
+
from apohara_context_forge.registry.context_registry import ContextRegistry
|
| 31 |
|
| 32 |
registry = ContextRegistry()
|
| 33 |
original_tokens = len(context.split())
|
|
|
|
| 60 |
)
|
| 61 |
elif similarity < 0.85 and original_tokens > 500:
|
| 62 |
# Compress only
|
| 63 |
+
from apohara_context_forge.compression.compressor import ContextCompressor
|
| 64 |
compressor = ContextCompressor()
|
| 65 |
compressed, ratio = await compressor.compress(context, settings.contextforge_compression_rate)
|
| 66 |
final_tokens = len(compressed.split())
|
|
|
|
| 73 |
)
|
| 74 |
elif similarity >= 0.85 and original_tokens > 500:
|
| 75 |
# Both reuse and compress
|
| 76 |
+
from apohara_context_forge.compression.compressor import ContextCompressor
|
| 77 |
compressor = ContextCompressor()
|
| 78 |
compressed, ratio = await compressor.compress(context, settings.contextforge_compression_rate)
|
| 79 |
final_tokens = len(compressed.split())
|
{contextforge → apohara_context_forge}/config.py
RENAMED
|
File without changes
|
{contextforge → apohara_context_forge}/decoding/__init__.py
RENAMED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""Decoding package — speculative decoding coordinators."""
|
| 2 |
|
| 3 |
-
from
|
| 4 |
SpeculativeConfig,
|
| 5 |
SpeculativeCoordinator,
|
| 6 |
SpeculativeResult,
|
|
|
|
| 1 |
"""Decoding package — speculative decoding coordinators."""
|
| 2 |
|
| 3 |
+
from apohara_context_forge.decoding.speculative_coordinator import (
|
| 4 |
SpeculativeConfig,
|
| 5 |
SpeculativeCoordinator,
|
| 6 |
SpeculativeResult,
|
apohara_context_forge/decoding/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (438 Bytes). View file
|
|
|
apohara_context_forge/decoding/__pycache__/speculative_coordinator.cpython-314.pyc
ADDED
|
Binary file (13.6 kB). View file
|
|
|
{contextforge → apohara_context_forge}/decoding/speculative_coordinator.py
RENAMED
|
@@ -29,7 +29,7 @@ from typing import Optional, TYPE_CHECKING
|
|
| 29 |
logger = logging.getLogger(__name__)
|
| 30 |
|
| 31 |
if TYPE_CHECKING:
|
| 32 |
-
from
|
| 33 |
|
| 34 |
|
| 35 |
@dataclass
|
|
|
|
| 29 |
logger = logging.getLogger(__name__)
|
| 30 |
|
| 31 |
if TYPE_CHECKING:
|
| 32 |
+
from apohara_context_forge.scheduling.queueing_controller import QueueingController
|
| 33 |
|
| 34 |
|
| 35 |
@dataclass
|
{contextforge → apohara_context_forge}/dedup/__init__.py
RENAMED
|
File without changes
|
apohara_context_forge/dedup/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (216 Bytes). View file
|
|
|
apohara_context_forge/dedup/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (218 Bytes). View file
|
|
|
apohara_context_forge/dedup/__pycache__/_deprecated_dedup_engine.cpython-314.pyc
ADDED
|
Binary file (5.96 kB). View file
|
|
|
apohara_context_forge/dedup/__pycache__/embedder.cpython-314.pyc
ADDED
|
Binary file (3.87 kB). View file
|
|
|
apohara_context_forge/dedup/__pycache__/faiss_index.cpython-312.pyc
ADDED
|
Binary file (12.6 kB). View file
|
|
|
apohara_context_forge/dedup/__pycache__/faiss_index.cpython-314.pyc
ADDED
|
Binary file (14.4 kB). View file
|
|
|
apohara_context_forge/dedup/__pycache__/lsh_engine.cpython-312.pyc
ADDED
|
Binary file (11.6 kB). View file
|
|
|