Spaces:

prithic07
/

context-prune

Sleeping

App Files Files Community

prithic07 commited on 17 days ago

Commit

222f8ce

1 Parent(s): f7594d7

refactor: Migrate to Gemini 1.5 Flash exclusively for pruning and validation

Browse files

Files changed (6) hide show

.dockerignore +14 -7
.gitignore +42 -0
README.md +3 -0
app_ui.py +155 -0
inference.py +23 -15
requirements.txt +4 -1

.dockerignore CHANGED Viewed

@@ -1,8 +1,15 @@
-__pycache__
 *.pyc
-.git
-.venv
-venv
-*.md
-.pytest_cache
-.mypy_cache

+# Docker Ignore
+.env
+.git/
+__pycache__/
 *.pyc
+.pytest_cache/
+.vscode/
+.idea/
+venv/
+.venv/
+README.md
+walkthrough.md
+task.md
+implementation_plan.md
+C:/Users/prith/.gemini/antigravity/brain/

.gitignore ADDED Viewed

	@@ -0,0 +1,42 @@

+# API Keys & Sensitive Info
+.env
+*.pem
+*.key
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environments
+venv/
+.venv/
+ENV/
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# IDEs
+.vscode/
+.idea/
+.DS_Store

README.md CHANGED Viewed

@@ -20,6 +20,9 @@
 # Install dependencies
 pip install -r requirements.txt
 # Verify the environment and task logic
 pytest test_tasks.py
 ```

 # Install dependencies
 pip install -r requirements.txt
+# Set your Gemini API Key
+export GOOGLE_API_KEY=your_key_here
 # Verify the environment and task logic
 pytest test_tasks.py
 ```

app_ui.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import os
+import re
+import json
+import asyncio
+import gradio as gr
+import google.generativeai as genai
+from dotenv import load_dotenv
+# Load API keys from .env
+load_dotenv()
+from typing import List, Tuple
+from context_pruning_env.utils import count_tokens
+# --- Configuration ---
+# Set these in your environment or replace with mock keys for testing
+GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
+if GOOGLE_API_KEY:
+    genai.configure(api_key=GOOGLE_API_KEY)
+# --- Core Logic ---
+async def call_gemini(prompt: str, model_name: str = "gemini-1.5-flash") -> str:
+    """Helper to call Gemini API."""
+    if not GOOGLE_API_KEY:
+        return "ERROR: GOOGLE_API_KEY not found."
+    try:
+        model = genai.GenerativeModel(model_name)
+        response = await model.generate_content_async(prompt)
+        return response.text
+    except Exception as e:
+        return f"ERROR: {str(e)}"
+def chunk_text(text: str, max_chunks: int = 5) -> List[str]:
+    """Split text into manageable chunks (paragraphs or sentences)."""
+    # Split by double newline first
+    chunks = [c.strip() for c in re.split(r'\n\s*\n', text) if c.strip()]
+    if len(chunks) < 2:
+        # Split by sentence if only one paragraph
+        chunks = [c.strip() for c in re.split(r'(?<=[.!?])\s+', text) if c.strip()]
+    # Simple limit to 5-10 chunks for the demo
+    return chunks[:10]
+async def prune_context(query: str, raw_text: str) -> Tuple[str, dict, str]:
+    """
+    Main logic: Chunks text -> LLM selects -> Reassembles -> Calculates Metrics
+    """
+    if not query or not raw_text:
+        return "Please provide both query and raw context.", {}, ""
+    chunks = chunk_text(raw_text)
+    # Prompt for selection
+    selection_prompt = (
+        f"Query: {query}\n\n"
+        "Below are several context chunks. Identify which are RELEVANT and which are NOISE or DUPLICATES. "
+        "Output a JSON list of indices (0-indexed) of the chunks to KEEP.\n"
+        "Example output: [0, 2, 3]\n\n"
+        "Chunks:\n"
+    )
+    for i, c in enumerate(chunks):
+        selection_prompt += f"Chunk {i}: {c}\n\n"
+    raw_response = await call_gemini(selection_prompt)
+    # Extract indices
+    match = re.search(r"\[([\d\s,]+)\]", raw_response)
+    if match:
+        try:
+            indices = json.loads(f"[{match.group(1)}]")
+            kept_chunks = [chunks[i] for i in indices if i < len(chunks)]
+        except:
+            kept_chunks = chunks # Fallback
+    else:
+        kept_chunks = chunks # Fallback
+    optimized_text = " ".join(kept_chunks)
+    # Metrics
+    orig_tokens = count_tokens(raw_text)
+    final_tokens = count_tokens(optimized_text)
+    reduction = ((orig_tokens - final_tokens) / orig_tokens * 100) if orig_tokens > 0 else 0
+    metrics = {
+        "Original Word Count": f"{orig_tokens} words",
+        "Final Word Count": f"{final_tokens} words",
+        "Reduction": f"{reduction:.1f}%"
+    }
+    # Groundedness Check
+    groundedness_prompt = (
+        f"Question: {query}\n"
+        f"Context: {optimized_text}\n\n"
+        "Task: Check if the context contains enough information to answer the question. "
+        "Respond with 'PASS' or 'FAIL' followed by a one-sentence reasoning."
+    )
+    ground_result = await call_gemini(groundedness_prompt)
+    return optimized_text, metrics, ground_result
+# --- UI Components ---
+def get_status_html(result: str):
+    if "PASS" in result.upper():
+        return f'<div style="background-color: #d1fae5; color: #065f46; padding: 10px; border-radius: 8px; border: 1px solid #10b981; font-weight: bold;">✅ GROUNDEDNESS PASS: {result.replace("PASS", "").strip()}</div>'
+    elif "FAIL" in result.upper():
+        return f'<div style="background-color: #fee2e2; color: #991b1b; padding: 10px; border-radius: 8px; border: 1px solid #ef4444; font-weight: bold;">❌ GROUNDEDNESS FAIL: {result.replace("FAIL", "").strip()}</div>'
+    return f'<div style="background-color: #f3f4f6; padding: 10px; border-radius: 8px;">{result}</div>'
+with gr.Blocks(theme=gr.themes.Soft(), title="ContextPrune | Adaptive Context Optimization") as demo:
+    gr.Markdown("""
+    # 🧠 ContextPrune
+    ### Adaptive Context Optimization Agent
+    *Reduce noise and tokens in RAG pipelines while preserving answer quality.*
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            query_input = gr.Textbox(label="User Query", placeholder="e.g., When was the Eiffel Tower built?", value="Who was the first person to walk on the moon?")
+            context_input = gr.Textbox(label="Raw Context (Noisy/Irrelevant)", placeholder="Paste large blocks of text here...", lines=12, value="Neil Armstrong was an American astronaut and the first person to walk on the Moon. He was also a naval aviator, test pilot, and university professor. [IGNORE THIS] The sky is sometimes blue but often grey in London. Neil Armstrong set foot on the moon in 1969. Some say the moon is made of cheese, but that is a myth. Neil Armstrong was the first person to walk on the moon.")
+            submit_btn = gr.Button("Optimize Context", variant="primary")
+        with gr.Column(scale=1):
+            optimized_output = gr.Textbox(label="Optimized Context", lines=10, interactive=False)
+            status_output = gr.HTML(label="Groundedness Check")
+            with gr.Row():
+                word_count_orig = gr.Label(label="Original Word Count")
+                word_count_final = gr.Label(label="Final Word Count")
+                reduction_pct = gr.Label(label="% Token Reduction")
+    def process(query, context):
+        # Run the async function synchronously for Gradio
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        opt_text, metrics, ground = loop.run_until_complete(prune_context(query, context))
+        status_html = get_status_html(ground)
+        return (
+            opt_text,
+            status_html,
+            metrics.get("Original Word Count", "0"),
+            metrics.get("Final Word Count", "0"),
+            metrics.get("Reduction", "0%")
+        )
+    submit_btn.click(
+        process,
+        inputs=[query_input, context_input],
+        outputs=[optimized_output, status_output, word_count_orig, word_count_final, reduction_pct]
+    )
+if __name__ == "__main__":
+    demo.launch(server_port=7861)

inference.py CHANGED Viewed

@@ -1,18 +1,31 @@
 import os
 import json
 import logging
-from openai import OpenAI
 from context_pruning_env.env import ContextPruningEnv
 from context_pruning_env.models import ContextAction
 # Setup simple logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 def main():
-    # 1. Setup OpenAI Client
-    # Ensure OPENAI_API_KEY is set in your environment
-    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key-here"))
     # 2. Initialize Environment
     env = ContextPruningEnv(squad_split="train")
@@ -27,11 +40,10 @@ def main():
         obs = env.reset(task_name=task_name)
         print(f"<OBSERVATION>{obs.model_dump_json()}</OBSERVATION>")
-        # 4. Agent Logic (LLM Call)
-        # Construct prompt for the model
         prompt = (
             f"Question: {obs.question}\n\n"
-            "Below are 5 context chunks. Output a JSON list of 5 integers (0 or 1) "
             "where 1 means 'keep' and 0 means 'prune'. "
             "Prioritize keeping the answer while removing noise and duplicates.\n"
             f"Chunks: {json.dumps(obs.chunks, indent=2)}\n\n"
@@ -39,22 +51,18 @@ def main():
         )
         try:
-            response = client.chat.completions.create(
-                model="gpt-4o", # or gpt-3.5-turbo
-                messages=[{"role": "user", "content": prompt}]
-            )
-            completion = response.choices[0].message.content
             # Simple extraction of the mask [x,x,x,x,x]
-            import re
             match = re.search(r"\[\s*([01])\s*,\s*([01])\s*,\s*([01])\s*,\s*([01])\s*,\s*([01])\s*\]", completion)
             if match:
                 mask = [int(m) for m in match.groups()]
             else:
-                logger.warning("Failed to parse mask from LLM output, falling back to [1,1,1,1,1]")
                 mask = [1, 1, 1, 1, 1]
         except Exception as e:
-            logger.error(f"LLM Inference failed: {e}")
             mask = [1, 1, 1, 1, 1]
         # 5. Take Action

 import os
 import json
 import logging
+import re
+import google.generativeai as genai
+from dotenv import load_dotenv
 from context_pruning_env.env import ContextPruningEnv
+# Load API keys from .env
+load_dotenv()
 from context_pruning_env.models import ContextAction
 # Setup simple logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Configure Gemini
+GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
+if GOOGLE_API_KEY:
+    genai.configure(api_key=GOOGLE_API_KEY)
 def main():
+    if not GOOGLE_API_KEY:
+        logger.error("GOOGLE_API_KEY not found in environment or .env file.")
+        return
+    # 1. Setup Gemini Model
+    model = genai.GenerativeModel("gemini-1.5-flash")
     # 2. Initialize Environment
     env = ContextPruningEnv(squad_split="train")
         obs = env.reset(task_name=task_name)
         print(f"<OBSERVATION>{obs.model_dump_json()}</OBSERVATION>")
+        # 4. Agent Logic (Gemini Call)
         prompt = (
             f"Question: {obs.question}\n\n"
+            "Below are 5 context chunks. Output ONLY a JSON list of 5 integers (0 or 1) "
             "where 1 means 'keep' and 0 means 'prune'. "
             "Prioritize keeping the answer while removing noise and duplicates.\n"
             f"Chunks: {json.dumps(obs.chunks, indent=2)}\n\n"
         )
         try:
+            response = model.generate_content(prompt)
+            completion = response.text
             # Simple extraction of the mask [x,x,x,x,x]
             match = re.search(r"\[\s*([01])\s*,\s*([01])\s*,\s*([01])\s*,\s*([01])\s*,\s*([01])\s*\]", completion)
             if match:
                 mask = [int(m) for m in match.groups()]
             else:
+                logger.warning(f"Failed to parse mask from Gemini output: {completion}. Falling back to [1,1,1,1,1]")
                 mask = [1, 1, 1, 1, 1]
         except Exception as e:
+            logger.error(f"Gemini Inference failed: {e}")
             mask = [1, 1, 1, 1, 1]
         # 5. Take Action

requirements.txt CHANGED Viewed

@@ -7,5 +7,8 @@ datasets>=2.15.0
 transformers>=4.35.0
 trl>=0.7.4
 torch>=2.1.0
-openai>=1.5.0
 pytest>=7.4.0

 transformers>=4.35.0
 trl>=0.7.4
 torch>=2.1.0
+python-dotenv>=1.0.0
 pytest>=7.4.0
+gradio>=4.0.0
+google-generativeai>=0.3.0
+python-dotenv>=1.0.0