Spaces:

prithic07
/

context-prune

Sleeping

App Files Files Community

prithic07 commited on 8 days ago

Commit

582387d

1 Parent(s): 3838887

Deploy: Synchronize ports to 7860 and add Hugging Face Space metadata

Browse files

Files changed (26) hide show

Dockerfile +2 -2
README.md +1 -2
app.py +1 -1
context_pruning_env/env.py +0 -152
context_pruning_env/graders.py +0 -56
context_pruning_env/models.py +0 -54
context_pruning_env/utils.py +0 -80
openenv.yaml +1 -1
rag_gc_env/__init__.py +0 -11
rag_gc_env/__pycache__/__init__.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/environment.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/grader.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/inference.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/models.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/rewards.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/tasks.cpython-311.pyc +0 -0
rag_gc_env/environment.py +0 -187
rag_gc_env/grader.py +0 -43
rag_gc_env/inference.py +0 -49
rag_gc_env/models.py +0 -53
rag_gc_env/rewards.py +0 -89
rag_gc_env/server/__init__.py +0 -1
rag_gc_env/server/__pycache__/__init__.cpython-311.pyc +0 -0
rag_gc_env/server/__pycache__/app.cpython-311.pyc +0 -0
rag_gc_env/server/app.py +0 -23
rag_gc_env/tasks.py +0 -144

Dockerfile CHANGED Viewed

@@ -12,6 +12,6 @@ RUN pip install --no-cache-dir --upgrade pip && \
 COPY . /app
-EXPOSE 8000
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

 COPY . /app
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -75,7 +75,7 @@ ContextPrune includes three canonical tasks that simulate high-pressure operatio
 ## 5. Technical Components
 - **`rag_optimizer_env/`**: Core state management, hybrid retrieval (Keyword + Semantic), and token estimation using `llm_runtime`.
-- **`app.py`**: A standard FastAPI implementation.
 - **`inference.py`**: A baseline agent script demonstrating how to use the OpenAI-compatible interface.
 - **`validate.py`**: A robust validation suite that runs a full episode lifecycle locally to ensure 100% environment compliance.
@@ -88,4 +88,3 @@ ContextPrune includes three canonical tasks that simulate high-pressure operatio
 3. **Control Panel**: `streamlit run optimizer_ui.py`
 4. **Validation**: `python validate.py`
-Built for Context Optimization Research.

 ## 5. Technical Components
 - **`rag_optimizer_env/`**: Core state management, hybrid retrieval (Keyword + Semantic), and token estimation using `llm_runtime`.
+- **`app.py`**: A standard FastAPI implementation. Built for Context Optimization Research.
 - **`inference.py`**: A baseline agent script demonstrating how to use the OpenAI-compatible interface.
 - **`validate.py`**: A robust validation suite that runs a full episode lifecycle locally to ensure 100% environment compliance.
 3. **Control Panel**: `streamlit run optimizer_ui.py`
 4. **Validation**: `python validate.py`

app.py CHANGED Viewed

@@ -387,4 +387,4 @@ async def optimize_prompt_endpoint(payload: OptimizePromptRequest):
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=False)

 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)

context_pruning_env/env.py DELETED Viewed

@@ -1,152 +0,0 @@
-from __future__ import annotations
-from typing import Any, Optional, List, Dict
-from uuid import uuid4
-from openenv.core.env_server.interfaces import Environment
-from context_pruning_env.models import (
-    ContextAction,
-    ContextObservation,
-    ContextReward,
-    PruningState,
-    ChunkItem
-)
-from context_pruning_env.utils import SQuADLoader, count_tokens
-from context_pruning_env.graders import (
-    grade_noise_purge,
-    grade_dedupe_arena,
-    grade_signal_extract
-)
-class ContextPruningEnv(Environment[ContextAction, ContextObservation, PruningState]):
-    """
-    Hackathon-compliant Context Pruning Environment.
-    """
-    def __init__(self, squad_split: str = "train"):
-        super().__init__(transform=None, rubric=None)
-        self.loader = SQuADLoader(split=squad_split)
-        self._state = None
-    def reset(
-        self,
-        seed: Optional[int] = None,
-        episode_id: Optional[str] = None,
-        task_name: Optional[str] = "noise_purge",
-        **kwargs: Any,
-    ) -> ContextObservation:
-        """
-        Starts a new episode with the specified task.
-        """
-        task_name = task_name or "noise_purge"
-        question, chunks_data = self.loader.get_episode(task_name)
-        chunks = []
-        total_tokens = 0
-        for item in chunks_data:
-            tokens = count_tokens(item["content"])
-            total_tokens += tokens
-            chunks.append(ChunkItem(
-                content=item["content"],
-                is_gold=item["is_gold"],
-                is_duplicate=item["is_duplicate"],
-                tokens=tokens
-            ))
-        self._state = PruningState(
-            episode_id=episode_id or str(uuid4()),
-            task_name=task_name,
-            question=question,
-            chunks=chunks,
-            initial_tokens=total_tokens,
-            step_count=0,
-            done=False
-        )
-        return self._observe(message=f"Task '{task_name}' initialized.")
-    def _observe(self, message: str = "") -> ContextObservation:
-        """Create observation from state."""
-        return ContextObservation(
-            done=self._state.done,
-            question=self._state.question,
-            chunks=[c.content for c in self._state.chunks],
-            initial_token_count=self._state.initial_tokens,
-            current_token_count=sum(c.tokens for c in self._state.chunks),
-            task_name=self._state.task_name,
-            message=message
-        )
-    def step(
-        self,
-        action: ContextAction,
-        **kwargs: Any,
-    ) -> ContextObservation:
-        """
-        Takes a binary mask and calculates rewards based on trajectory signals.
-        """
-        if self._state.done:
-            return self._observe(message="Episode is already done.")
-        mask = action.mask
-        if len(mask) != len(self._state.chunks):
-            # Pad with 0 (Prune) instead of 1 (Keep) to ensure agent optimization
-            mask = (mask + [0] * len(self._state.chunks))[:len(self._state.chunks)]
-        # Trajectory Simulation Logic
-        total_reward = 0.0
-        efficiency_reward = 0.0
-        accuracy_reward = 0.0
-        gold_penalty = 0.0
-        success = True
-        for i, kept in enumerate(mask):
-            chunk = self._state.chunks[i]
-            if not kept: # Pruned
-                if chunk.is_gold:
-                    # Critical Failure
-                    gold_penalty = -1.0
-                    success = False
-                    break # Immediate stop
-                else:
-                    # Correctly pruned noise/duplicate
-                    efficiency_reward += 0.1
-            else: # Kept
-                pass
-        # Final Accuracy Bonus
-        if success:
-            accuracy_reward = 0.7
-        total_reward = efficiency_reward + accuracy_reward + gold_penalty
-        # Task Score (Normalized 0.0 to 1.0 for the evaluator)
-        if self._state.task_name == "noise_purge":
-            score_obj = grade_noise_purge(mask, self._state.chunks)
-        elif self._state.task_name == "dedupe_arena":
-            score_obj = grade_dedupe_arena(mask, self._state.chunks)
-        elif self._state.task_name == "signal_extract":
-            score_obj = grade_signal_extract(mask, self._state.chunks)
-        else:
-            score_obj = grade_noise_purge(mask, self._state.chunks)
-        self._state.done = True
-        self._state.step_count += 1
-        obs = self._observe(message=score_obj.message)
-        obs.reward = total_reward # Trajectory reward
-        if not obs.metadata:
-            obs.metadata = {}
-        obs.metadata["eval_score"] = score_obj.score # Grader score
-        obs.metadata["reward_detail"] = {
-            "efficiency": efficiency_reward,
-            "accuracy": accuracy_reward,
-            "penalty": gold_penalty
-        }
-        return obs
-    @property
-    def state(self) -> PruningState:
-        """Official state access as required by openenv-core."""
-        return self._state

context_pruning_env/graders.py DELETED Viewed

@@ -1,56 +0,0 @@
-from typing import List
-from context_pruning_env.models import ChunkItem, ContextReward
-def grade_noise_purge(mask: List[int], chunks: List[ChunkItem]) -> ContextReward:
-    """
-    Easy Task: Score 1.0 if gold kept AND noise pruned.
-    """
-    gold_kept = any(mask[i] == 1 and chunks[i].is_gold for i in range(len(mask)))
-    noise_pruned = all(mask[i] == 0 for i in range(len(mask)) if not chunks[i].is_gold)
-    if not gold_kept:
-        return ContextReward(score=0.0, gold_penalty=-1.0, message="Critical: Gold chunk lost.")
-    if noise_pruned:
-        return ContextReward(score=1.0, message="Perfect: All noise purged.")
-    else:
-        return ContextReward(score=0.5, message="Partial: Gold kept but noise remains.")
-def grade_dedupe_arena(mask: List[int], chunks: List[ChunkItem]) -> ContextReward:
-    """
-    Medium Task: 1.0 if word count reduced > 50% AND gold kept.
-    """
-    initial_words = sum(len(c.content.split()) for c in chunks)
-    final_words = sum(len(chunks[i].content.split()) for i, kept in enumerate(mask) if kept)
-    gold_kept = any(mask[i] == 1 and chunks[i].is_gold for i in range(len(mask)))
-    reduction = 1.0 - (final_words / initial_words) if initial_words > 0 else 1.0
-    if not gold_kept:
-        return ContextReward(score=0.0, message="Critical: Answer lost during deduplication.")
-    if reduction >= 0.5:
-        return ContextReward(score=1.0, message=f"Great: {reduction:.1%} word reduction achieved.")
-    else:
-        return ContextReward(score=0.5, message=f"Partial: Only {reduction:.1%} reduction.")
-def grade_signal_extract(mask: List[int], chunks: List[ChunkItem]) -> ContextReward:
-    """
-    Hard Task: 1 - (FinalTokens/InitialTokens) if gold kept.
-    """
-    initial_tokens = sum(c.tokens for c in chunks)
-    final_tokens = sum(chunks[i].tokens for i, kept in enumerate(mask) if kept)
-    gold_kept = any(mask[i] == 1 and chunks[i].is_gold for i in range(len(mask)))
-    if not gold_kept:
-        return ContextReward(score=0.0, message="Critical: Signal lost in noise.")
-    reduction_score = 1.0 - (final_tokens / initial_tokens) if initial_tokens > 0 else 0.0
-    # Ensure score is at least positive if gold is kept
-    final_score = max(0.1, reduction_score)
-    return ContextReward(
-        score=final_score,
-        message=f"Signal Extracted: {reduction_score:.1%} compression."
-    )

context_pruning_env/models.py DELETED Viewed

@@ -1,54 +0,0 @@
-from __future__ import annotations
-from typing import List, Optional, Any, Dict
-from pydantic import BaseModel, Field
-from openenv.core.env_server.types import Action, Observation, State
-class ContextAction(Action):
-    """
-    Action space: A binary mask of N values (1 = keep, 0 = prune).
-    """
-    mask: List[int] = Field(
-        ...,
-        min_length=1,
-        description="Binary mask of integers (0 or 1) indicating which chunks to keep."
-    )
-class ContextObservation(Observation):
-    """
-    Observation provided to the agent.
-    """
-    question: str
-    chunks: List[str] = Field(default_factory=list, description="Current context chunks.")
-    initial_token_count: int = 0
-    current_token_count: int = 0
-    task_name: str = ""
-    message: str = ""
-class ContextReward(BaseModel):
-    """
-    Detailed reward breakdown for Meta x Scaler audit.
-    """
-    score: float = Field(0.0, ge=0.0, le=1.0, description="Overall task score (0 to 1).")
-    efficiency_reward: float = 0.0
-    accuracy_reward: float = 0.0
-    gold_penalty: float = 0.0
-    message: str = ""
-class ChunkItem(BaseModel):
-    """Internal representation of a context chunk."""
-    content: str
-    is_gold: bool = False
-    tokens: int = 0
-    is_duplicate: bool = False
-class PruningState(State):
-    """
-    Internal state for ContextPrune.
-    """
-    task_name: str
-    question: str
-    chunks: List[ChunkItem]
-    initial_tokens: int
-    step_count: int = 0
-    done: bool = False
-    metadata: Dict[str, Any] = Field(default_factory=dict)

context_pruning_env/utils.py DELETED Viewed

@@ -1,80 +0,0 @@
-import random
-import re
-from typing import List, Tuple, Dict, Any
-from datasets import load_dataset
-import logging
-logger = logging.getLogger(__name__)
-class SQuADLoader:
-    def __init__(self, split: str = "train"):
-        try:
-            self.dataset = load_dataset("squad", split=split)
-        except Exception as e:
-            logger.error(f"Failed to load SQuAD: {e}")
-            self.dataset = []
-        self.indices = list(range(len(self.dataset)))
-        random.shuffle(self.indices)
-        self.current_ptr = 0
-    def _get_next_entry(self):
-        if self.current_ptr >= len(self.indices):
-            random.shuffle(self.indices)
-            self.current_ptr = 0
-        idx = self.indices[self.current_ptr]
-        self.current_ptr += 1
-        return idx, self.dataset[idx]
-    def get_episode(self, task_name: str) -> Tuple[str, List[Dict[str, Any]]]:
-        """
-        Returns (question, List[Dict(content, is_gold, is_duplicate)])
-        """
-        idx, entry = self._get_next_entry()
-        question = entry["question"]
-        gold_context = entry["context"]
-        chunks = []
-        if task_name == "noise_purge":
-            # Easy: 1 Gold + 1 Irrelevant
-            chunks.append({"content": gold_context, "is_gold": True, "is_duplicate": False})
-            _, noise_entry = self._get_next_entry()
-            chunks.append({"content": noise_entry["context"], "is_gold": False, "is_duplicate": False})
-        elif task_name == "dedupe_arena":
-            # Medium: 1 Gold + 2 Near-Duplicates (Simulated by repeating gold)
-            chunks.append({"content": gold_context, "is_gold": True, "is_duplicate": False})
-            # Duplicate 1: slightly modified or identical
-            chunks.append({"content": gold_context + " ", "is_gold": True, "is_duplicate": True})
-            # Duplicate 2: slightly modified
-            chunks.append({"content": "Actually, " + gold_context, "is_gold": True, "is_duplicate": True})
-        elif task_name == "signal_extract":
-            # Hard: 1 Gold context + multiple noise (2,000+ words total)
-            long_context_parts = [gold_context]
-            current_words = len(gold_context.split())
-            while current_words < 2200: # Ensure 2,000+ words
-                _, noise_entry = self._get_next_entry()
-                content = noise_entry["context"]
-                long_context_parts.append(content)
-                current_words += len(content.split())
-            # Shuffling the parts so the gold one isn't first
-            random.shuffle(long_context_parts)
-            for part in long_context_parts:
-                is_gold = (part == gold_context)
-                chunks.append({"content": part, "is_gold": is_gold, "is_duplicate": False})
-        else:
-            # Default to noise_purge
-            return self.get_episode("noise_purge")
-        # Shuffle chunks for non-signal tasks
-        if task_name != "signal_extract":
-            random.shuffle(chunks)
-        return question, chunks
-def count_tokens(text: str) -> int:
-    """Standard token counter for efficiency rewards."""
-    return len(text.split())

openenv.yaml CHANGED Viewed

@@ -14,5 +14,5 @@ tasks:
 action_space: ["inspect_artifact", "prioritize_artifact", "summarize_artifact", "set_resolution_plan", "submit_report"]
 observation_space: ["case_summary", "objective", "workflow_stage", "available_artifacts", "reviewed_artifacts", "prioritized_artifacts", "plan_draft", "total_tokens_used", "token_budget"]
 reward_range: [0.0, 1.0]
-port: 8000
 app: app.py

 action_space: ["inspect_artifact", "prioritize_artifact", "summarize_artifact", "set_resolution_plan", "submit_report"]
 observation_space: ["case_summary", "objective", "workflow_stage", "available_artifacts", "reviewed_artifacts", "prioritized_artifacts", "plan_draft", "total_tokens_used", "token_budget"]
 reward_range: [0.0, 1.0]
+port: 7860
 app: app.py

rag_gc_env/__init__.py DELETED Viewed

@@ -1,11 +0,0 @@
-from rag_gc_env.models import RAGGCAction, RAGGCObservation, RAGGCReward, RAGGCState
-from rag_gc_env.environment import RAGGCEnvironment
-__all__ = [
-    "RAGGCAction",
-    "RAGGCObservation",
-    "RAGGCReward",
-    "RAGGCState",
-    "RAGGCEnvironment",
-]

rag_gc_env/__pycache__/__init__.cpython-311.pyc DELETED Viewed

Binary file (440 Bytes)

rag_gc_env/__pycache__/environment.cpython-311.pyc DELETED Viewed

Binary file (9.34 kB)

rag_gc_env/__pycache__/grader.cpython-311.pyc DELETED Viewed

Binary file (2.75 kB)

rag_gc_env/__pycache__/inference.cpython-311.pyc DELETED Viewed

Binary file (2.66 kB)

rag_gc_env/__pycache__/models.cpython-311.pyc DELETED Viewed

Binary file (3.51 kB)

rag_gc_env/__pycache__/rewards.cpython-311.pyc DELETED Viewed

Binary file (3.92 kB)

rag_gc_env/__pycache__/tasks.cpython-311.pyc DELETED Viewed

Binary file (5.11 kB)

rag_gc_env/environment.py DELETED Viewed

@@ -1,187 +0,0 @@
-from __future__ import annotations
-from typing import Any, Optional
-from uuid import uuid4
-from openenv.core.env_server.interfaces import Environment
-from rag_gc_env.grader import grade_context
-from rag_gc_env.models import DocumentItem, RAGGCAction, RAGGCObservation, RAGGCReward, RAGGCState
-from rag_gc_env.rewards import step_reward, summarize_deterministic
-from rag_gc_env.tasks import ALL_TASKS, TaskSpec, task_by_seed
-class RAGGCEnvironment(Environment[RAGGCAction, RAGGCObservation, RAGGCState]):
-    SUPPORTS_CONCURRENT_SESSIONS = True
-    def __init__(self) -> None:
-        super().__init__(transform=None, rubric=None)
-        self._state = RAGGCState(episode_id=str(uuid4()), step_count=0)
-        self._task: TaskSpec = task_by_seed(0)
-        self._docs: dict[str, DocumentItem] = {}
-        self._removed_critical = False
-    def _load_task(self, spec: TaskSpec) -> None:
-        self._docs = {}
-        for did, text, tok, _meta in spec.documents:
-            self._docs[did] = DocumentItem(document_id=did, text=text, tokens=tok)
-    def reset(
-        self,
-        seed: Optional[int] = None,
-        episode_id: Optional[str] = None,
-        task_name: Optional[str] = None,
-        **kwargs: Any,
-    ) -> RAGGCObservation:
-        self._reset_rubric()
-        sid = episode_id or str(uuid4())
-        if task_name and task_name in ALL_TASKS:
-            self._task = ALL_TASKS[task_name]
-        elif seed is not None:
-            self._task = task_by_seed(int(seed))
-        else:
-            self._task = task_by_seed(0)
-        self._load_task(self._task)
-        self._removed_critical = False
-        self._state = RAGGCState(
-            episode_id=sid,
-            step_count=0,
-            task_name=self._task.name,
-            max_steps=64,
-            removed_critical=False,
-            submitted=False,
-        )
-        return self._observe(done=False, reward_value=0.0, msg="ready")
-    def _total_tokens(self) -> int:
-        return sum(d.tokens for d in self._docs.values())
-    def _observe(
-        self,
-        done: bool,
-        reward_value: float,
-        msg: str,
-        reward_detail: Optional[RAGGCReward] = None,
-        grader_score: Optional[float] = None,
-    ) -> RAGGCObservation:
-        docs = sorted(self._docs.values(), key=lambda x: x.document_id)
-        return RAGGCObservation(
-            done=done,
-            reward=reward_value,
-            query=self._task.query,
-            documents=docs,
-            token_count=self._total_tokens(),
-            token_budget=self._task.token_budget,
-            task_name=self._task.name,
-            message=msg,
-            grader_score=grader_score,
-            reward_detail=reward_detail,
-            metadata={
-                "relevance": {
-                    row[0]: row[3].get("relevance", 0.5)
-                    for row in self._task.documents
-                    if row[0] in self._docs
-                },
-                "hints": {row[0]: row[3].get("hint", "") for row in self._task.documents},
-            },
-        )
-    def step(
-        self,
-        action: RAGGCAction,
-        timeout_s: Optional[float] = None,
-        **kwargs: Any,
-    ) -> RAGGCObservation:
-        self._state.step_count += 1
-        docs_before = dict(self._docs)
-        if action.verb == "submit":
-            score = grade_context(self._task, list(self._docs.values()))
-            self._state.submitted = True
-            r = RAGGCReward(
-                step_reward=score,
-                final_score=score,
-            )
-            obs = self._observe(
-                done=True,
-                reward_value=score,
-                msg="submitted",
-                reward_detail=r,
-                grader_score=score,
-            )
-            return self._apply_transform(obs)
-        if action.document_id is None or action.document_id not in self._docs:
-            obs = self._observe(
-                done=False,
-                reward_value=-0.1,
-                msg="unknown_document",
-            )
-            return self._apply_transform(obs)
-        did = action.document_id
-        removed_critical = False
-        if action.verb == "delete":
-            if did in self._task.critical_document_ids:
-                self._removed_critical = True
-                removed_critical = True
-            self._docs.pop(did, None)
-        elif action.verb == "keep":
-            pass
-        elif action.verb == "summarize":
-            item = self._docs[did]
-            new_text, new_tok = summarize_deterministic(item.text)
-            self._docs[did] = DocumentItem(
-                document_id=did,
-                text=new_text,
-                tokens=new_tok,
-            )
-            if did in self._task.critical_document_ids:
-                for p in self._task.required_phrases:
-                    if p not in new_text:
-                        self._removed_critical = True
-                        removed_critical = True
-        rdetail = step_reward(
-            self._task,
-            action.verb,
-            did,
-            docs_before,
-            self._docs,
-            removed_critical,
-        )
-        self._state.removed_critical = self._removed_critical
-        over = self._total_tokens() > self._task.token_budget
-        if over:
-            penalty = -0.08 * (self._total_tokens() - self._task.token_budget)
-            rdetail.token_penalty += penalty
-            rdetail.step_reward += penalty
-        done = self._state.step_count >= self._state.max_steps
-        final_score: Optional[float] = None
-        if done:
-            final_score = grade_context(self._task, list(self._docs.values()))
-            rdetail.final_score = final_score
-            rdetail.step_reward += final_score * 0.5
-        reward_val = rdetail.step_reward
-        if done:
-            # When done, the reward is primarily the final grader score,
-            # but we can preserve the step-specific bonus we added.
-            # final_score is the main signal.
-            reward_val = final_score if final_score is not None else rdetail.step_reward
-        obs = self._observe(
-            done=done,
-            reward_value=reward_val,
-            msg="over_budget" if over else ("graded" if done else "ok"),
-            reward_detail=rdetail,
-            grader_score=final_score if done else None,
-        )
-        return self._apply_transform(obs)
-    @property
-    def state(self) -> RAGGCState:
-        return self._state

rag_gc_env/grader.py DELETED Viewed

@@ -1,43 +0,0 @@
-from __future__ import annotations
-from rag_gc_env.models import DocumentItem
-from rag_gc_env.tasks import TaskSpec
-def _joined_text(docs: list[DocumentItem]) -> str:
-    return " ".join(d.text for d in docs)
-def grade_context(task: TaskSpec, final_documents: list[DocumentItem]) -> float:
-    """
-    Deterministic score in [0.0, 0.5, 1.0]:
-    1.0 — required facts present, budget respected, efficient (near optimal tokens)
-    0.5 — required facts present but inefficient or borderline budget
-    0.0 — missing facts, forbidden content present, or critical docs removed incorrectly
-    """
-    text = _joined_text(final_documents)
-    total_tokens = sum(d.tokens for d in final_documents)
-    for phrase in task.required_phrases:
-        if phrase not in text:
-            return 0.0
-    for phrase in task.forbidden_phrases:
-        if phrase in text:
-            return 0.0
-    for pid in task.poison_document_ids:
-        still = any(d.document_id == pid for d in final_documents)
-        if still:
-            return 0.0
-    if total_tokens > task.token_budget:
-        return 0.0
-    if not task.critical_document_ids.issubset({d.document_id for d in final_documents}):
-        return 0.0
-    if total_tokens <= task.optimal_max_tokens:
-        return 1.0
-    return 0.5

rag_gc_env/inference.py DELETED Viewed

@@ -1,49 +0,0 @@
-"""
-Reproducible baseline policy for Adaptive Context Optimization (RAG GC).
-Deterministic: fixed action sequences per task derived from metadata.
-"""
-from __future__ import annotations
-from rag_gc_env.environment import RAGGCEnvironment
-from rag_gc_env.models import RAGGCAction
-def run_baseline(task_name: str, seed: int = 0) -> tuple[float, list[str]]:
-    env = RAGGCEnvironment()
-    obs = env.reset(seed=seed, task_name=task_name)
-    log: list[str] = ["reset"]
-    def step(verb: str, doc_id: str | None) -> None:
-        nonlocal obs
-        obs = env.step(RAGGCAction(verb=verb, document_id=doc_id))
-        log.append(f"{verb}:{doc_id}")
-    if task_name == "easy_irrelevant_removal":
-        step("delete", "d1")
-        step("submit", None)
-    elif task_name == "medium_token_compression":
-        step("delete", "m2")
-        while obs.token_count > obs.token_budget and not obs.done:
-            step("summarize", "m0")
-            if len(log) > 40:
-                break
-        step("submit", None)
-    elif task_name == "hard_contradiction_removal":
-        step("delete", "h1")
-        step("submit", None)
-    else:
-        step("submit", None)
-    score = float(obs.grader_score or obs.reward or 0.0)
-    return score, log
-if __name__ == "__main__":
-    for name in (
-        "easy_irrelevant_removal",
-        "medium_token_compression",
-        "hard_contradiction_removal",
-    ):
-        s, lg = run_baseline(name, seed=0)
-        print(name, "score=", s, "trace=", lg)

rag_gc_env/models.py DELETED Viewed

@@ -1,53 +0,0 @@
-from __future__ import annotations
-from typing import Any, Literal, Optional
-from openenv.core.env_server.types import Action, Observation, State
-from pydantic import BaseModel, Field
-class DocumentItem(BaseModel):
-    document_id: str
-    text: str
-    tokens: int = Field(description="Estimated tokens for this snippet")
-class RAGGCAction(Action):
-    verb: Literal["keep", "delete", "summarize", "submit"] = Field(
-        description="Document operation or submit to finalize and grade"
-    )
-    document_id: Optional[str] = Field(
-        default=None,
-        description="Target document for keep/delete/summarize; omit for submit",
-    )
-class RAGGCReward(BaseModel):
-    step_reward: float = 0.0
-    relevance: float = 0.0
-    compression: float = 0.0
-    token_penalty: float = 0.0
-    critical_penalty: float = 0.0
-    final_score: Optional[float] = Field(
-        default=None, description="0.0–1.0 after submit; aligns with grader"
-    )
-class RAGGCObservation(Observation):
-    query: str = ""
-    documents: list[DocumentItem] = Field(default_factory=list)
-    token_count: int = 0
-    token_budget: int = 0
-    task_name: str = ""
-    reward_detail: Optional[RAGGCReward] = None
-    message: str = ""
-    grader_score: Optional[float] = Field(
-        default=None, description="Deterministic score after episode ends"
-    )
-class RAGGCState(State):
-    task_name: str = ""
-    max_steps: int = 64
-    removed_critical: bool = False
-    submitted: bool = False

rag_gc_env/rewards.py DELETED Viewed

@@ -1,89 +0,0 @@
-from __future__ import annotations
-from rag_gc_env.models import DocumentItem, RAGGCReward
-from rag_gc_env.tasks import TaskSpec
-def summarize_deterministic(text: str) -> tuple[str, int]:
-    """Deterministic compression: first sentence or capped prefix."""
-    stripped = text.strip()
-    if not stripped:
-        return "", 1
-    cut = stripped.split(". ")
-    first = cut[0] + ("." if not cut[0].endswith(".") else "")
-    if len(first) < 40 and len(cut) > 1:
-        first = cut[0] + ". " + cut[1] + ("." if not cut[1].endswith(".") else "")
-    cap = 280
-    out = first[:cap] + ("..." if len(first) > cap else "")
-    tokens = max(1, len(out) // 4)
-    return out, tokens
-def estimate_tokens(text: str) -> int:
-    return max(1, len(text) // 4)
-def step_reward(
-    task: TaskSpec,
-    verb: str,
-    doc_id: str | None,
-    docs_before: dict[str, DocumentItem],
-    docs_after: dict[str, DocumentItem],
-    removed_critical_flag: bool,
-) -> RAGGCReward:
-    rel = 0.0
-    comp = 0.0
-    tok_pen = 0.0
-    crit = 0.0
-    if removed_critical_flag:
-        crit = -3.0
-    if verb == "delete" and doc_id in docs_before:
-        meta = next(
-            (m for did, _, _, m in task.documents if did == doc_id),
-            {},
-        )
-        # Reward deleting irrelevant or poison documents
-        if doc_id in task.irrelevant_document_ids:
-            rel += 0.4
-        elif doc_id in task.poison_document_ids:
-            rel += 0.6
-        elif doc_id in task.critical_document_ids:
-            crit -= 3.0
-        elif meta.get("hint") == "fluff":
-            rel += 0.2
-        # Deleting tokens should NOT result in a penalty proportional to the deleted tokens;
-        # instead, it removes the 'keep' penalty they would have incurred.
-        # We can add a small constant 'action cost' for deleting if desired, but 0.0 is fine here.
-        tok_pen = 0.0
-    if verb == "summarize" and doc_id in docs_before:
-        before_t = docs_before[doc_id].tokens
-        after = docs_after.get(doc_id)
-        if after is not None:
-            # Reward for the reduction in size (efficiency)
-            reduction_ratio = (before_t - after.tokens) / max(before_t, 1)
-            comp += 0.3 * max(0.0, reduction_ratio)
-            # The remaining tokens still incur a small penalty
-            tok_pen -= 0.01 * after.tokens
-            if doc_id in task.critical_document_ids:
-                for p in task.required_phrases:
-                    if p not in after.text:
-                        crit -= 2.5
-    if verb == "keep" and doc_id in docs_before:
-        # Standard penalty for keeping tokens in context
-        tok_pen -= 0.01 * docs_before[doc_id].tokens
-    step = rel + comp + tok_pen + crit
-    return RAGGCReward(
-        step_reward=step,
-        relevance=rel,
-        compression=comp,
-        token_penalty=tok_pen,
-        critical_penalty=crit,
-    )

rag_gc_env/server/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # Server package for OpenEnv HTTP deployment

rag_gc_env/server/__pycache__/__init__.cpython-311.pyc DELETED Viewed

Binary file (154 Bytes)

rag_gc_env/server/__pycache__/app.cpython-311.pyc DELETED Viewed

Binary file (1.04 kB)

rag_gc_env/server/app.py DELETED Viewed

@@ -1,23 +0,0 @@
-import os
-from openenv.core.env_server.http_server import create_fastapi_app
-from rag_gc_env.environment import RAGGCEnvironment
-from rag_gc_env.models import RAGGCAction, RAGGCObservation
-app = create_fastapi_app(
-    RAGGCEnvironment,
-    RAGGCAction,
-    RAGGCObservation,
-)
-def main() -> None:
-    import uvicorn
-    port = int(os.environ.get("PORT", "8000"))
-    uvicorn.run(app, host="0.0.0.0", port=port)
-if __name__ == "__main__":
-    main()

rag_gc_env/tasks.py DELETED Viewed

@@ -1,144 +0,0 @@
-from __future__ import annotations
-from dataclasses import dataclass, field
-from typing import Any, FrozenSet
-@dataclass(frozen=True)
-class TaskSpec:
-    name: str
-    query: str
-    token_budget: int
-    documents: list[tuple[str, str, int, dict[str, Any]]]
-    # document_id, text, tokens, metadata (relevance, flags)
-    required_phrases: FrozenSet[str] = field(default_factory=frozenset)
-    forbidden_phrases: FrozenSet[str] = field(default_factory=frozenset)
-    critical_document_ids: FrozenSet[str] = field(default_factory=frozenset)
-    irrelevant_document_ids: FrozenSet[str] = field(default_factory=frozenset)
-    poison_document_ids: FrozenSet[str] = field(default_factory=frozenset)
-    optimal_max_tokens: int = 0
-def _docs(
-    rows: list[tuple[str, str, int, dict[str, Any]]],
-) -> list[tuple[str, str, int, dict[str, Any]]]:
-    return rows
-TASK_EASY = TaskSpec(
-    name="easy_irrelevant_removal",
-    query="What is the capital city of France?",
-    token_budget=400,
-    documents=_docs(
-        [
-            (
-                "d0",
-                "Paris has been the capital of France since political centralization in the country.",
-                24,
-                {"relevance": 0.95, "hint": "high"},
-            ),
-            (
-                "d1",
-                "Penguins thrive in Antarctica and are unrelated to European geography.",
-                18,
-                {"relevance": 0.08, "hint": "noise"},
-            ),
-            (
-                "d2",
-                "Lyon is a major French city but not the national capital.",
-                16,
-                {"relevance": 0.55, "hint": "partial"},
-            ),
-        ]
-    ),
-    required_phrases=frozenset({"Paris"}),
-    forbidden_phrases=frozenset(),
-    critical_document_ids=frozenset({"d0"}),
-    irrelevant_document_ids=frozenset({"d1"}),
-    poison_document_ids=frozenset(),
-    optimal_max_tokens=120,
-)
-_LONG_DUP = (
-    "Paris is the capital of France. " * 18
-    + "This repetition exists only to inflate token usage for compression tests."
-)
-TASK_MEDIUM = TaskSpec(
-    name="medium_token_compression",
-    query="Which city is the capital of France?",
-    token_budget=120,
-    documents=_docs(
-        [
-            (
-                "m0",
-                _LONG_DUP,
-                max(1, len(_LONG_DUP) // 4),
-                {"relevance": 0.9, "hint": "verbose"},
-            ),
-            (
-                "m1",
-                "Administrative records list a capital city but this line omits the name intentionally.",
-                14,
-                {"relevance": 0.55, "hint": "no_answer"},
-            ),
-            (
-                "m2",
-                "French cuisine is diverse; it does not change which city is the capital.",
-                14,
-                {"relevance": 0.35, "hint": "fluff"},
-            ),
-        ]
-    ),
-    required_phrases=frozenset({"Paris"}),
-    forbidden_phrases=frozenset(),
-    critical_document_ids=frozenset(),
-    irrelevant_document_ids=frozenset({"m2"}),
-    poison_document_ids=frozenset(),
-    optimal_max_tokens=90,
-)
-TASK_HARD = TaskSpec(
-    name="hard_contradiction_removal",
-    query="Compute the integer result of 17 + 25.",
-    token_budget=350,
-    documents=_docs(
-        [
-            (
-                "h0",
-                "Arithmetic trace: 17 + 25 = 42.",
-                12,
-                {"relevance": 0.9, "hint": "consistent", "trust": 0.95},
-            ),
-            (
-                "h1",
-                "Quick math note: 17 + 25 equals 43 for budgeting purposes.",
-                14,
-                {"relevance": 0.88, "hint": "poison", "trust": 0.2},
-            ),
-            (
-                "h2",
-                "Addition of integers is associative and commutative.",
-                10,
-                {"relevance": 0.4, "hint": "generic"},
-            ),
-        ]
-    ),
-    required_phrases=frozenset({"42"}),
-    forbidden_phrases=frozenset({"43"}),
-    critical_document_ids=frozenset({"h0"}),
-    irrelevant_document_ids=frozenset(),
-    poison_document_ids=frozenset({"h1"}),
-    optimal_max_tokens=200,
-)
-ALL_TASKS: dict[str, TaskSpec] = {
-    TASK_EASY.name: TASK_EASY,
-    TASK_MEDIUM.name: TASK_MEDIUM,
-    TASK_HARD.name: TASK_HARD,
-}
-def task_by_seed(seed: int) -> TaskSpec:
-    order = [TASK_EASY, TASK_MEDIUM, TASK_HARD]
-    return order[seed % 3]