Spaces:

ayushKishor
/

PR_Review_Negotiation_Environment

Running

App Files Files Community

3v324v23 commited on 13 days ago

Commit

b0144a1

1 Parent(s): 3a5834e

FINAL: 11-bug fix — scoped CSS, session_state persistence, error handling, diff renderer, binary safety

Browse files

Files changed (1) hide show

app.py +448 -123

app.py CHANGED Viewed

@@ -1,6 +1,18 @@
 """
-PR Review Command Center — Final Studio Grade Revision
-Theme: Ultra-Clean, High-Contrast & Secure
 """
 import streamlit as st
@@ -9,7 +21,9 @@ import os
 import httpx
 from openai import OpenAI
-# ─── Page Config ──────────────────────────────────────────────────────────────
 st.set_page_config(
     page_title="PR Command Center",
     page_icon="🛡️",
@@ -17,28 +31,38 @@ st.set_page_config(
     initial_sidebar_state="expanded",
 )
-# ─── Professional CSS Core ────────────────────────────────────────────────────
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&family=Fira+Code:wght@400;500&display=swap');
-.stApp { background-color: #f8fafc; font-family: 'Inter', sans-serif; }
-/* Sidebar: Professional Deep Blue */
 [data-testid="stSidebar"] {
     background-color: #ffffff !important;
     border-right: 1px solid #e2e8f0;
 }
-/* Deep High Contrast Text */
-h1, h2, h3, p, span, label { color: #0f172a !important; }
 .section-label {
-    font-size: 0.75rem; font-weight: 800; color: #1e293b !important;
-    text-transform: uppercase; letter-spacing: 0.05rem;
-    margin-top: 2rem; border-bottom: 2px solid #f1f5f9; padding-bottom: 4px;
 }
-/* Glass Metric Cards */
 .metric-card {
     background: #ffffff;
     border: 1px solid #e2e8f0;
@@ -47,30 +71,40 @@ h1, h2, h3, p, span, label { color: #0f172a !important; }
     box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05);
 }
-/* Badge System */
-.badge { padding: 6px 14px; border-radius: 20px; font-size: 0.85rem; font-weight: 800; border: 1px solid transparent; }
-.badge-approve  { background: #dcfce7; color: #166534 !important; border-color: #bbf7d0; }
-.badge-request  { background: #fee2e2; color: #991b1b !important; border-color: #fecaca; }
-.badge-escalate { background: #fef9c3; color: #854d0e !important; border-color: #fef08a; }
-.badge-running  { background: #dbeafe; color: #1e40af !important; border-color: #bfdbfe; }
-/* Timeline Bubbles */
-.bubble { padding: 1.5rem; border-radius: 12px; font-size: 0.95rem; line-height: 1.6; margin-bottom: 1.5rem; border: 1px solid #e2e8f0; }
 .bubble-reviewer { background: #ffffff; border-left: 6px solid #3b82f6; }
-.bubble-author { background: #f1f5f9; border-right: 6px solid #94a3b8; }
-.bubble-header { font-size: 0.75rem; font-weight: 800; margin-bottom: 0.75rem; color: #334155; display: flex; align-items: center; gap: 8px; }
-/* Clearer Buttons */
-button[kind="primary"] { background-color: #0f172a !important; color: white !important; font-weight: 700 !important; }
-button[kind="secondary"] { background-color: #ffffff !important; border: 1px solid #d0d7de !important; }
-/* Metric Values — FORCED VISIBILITY */
-.metric-card div:last-child {
-    color: #0f172a !important; /* Force Black/Blue */
-    opacity: 1 !important;
 }
-/* Diff Container — LIGHT THEME HIGH CONTRAST */
 .diff-container {
     background: #ffffff;
     border-radius: 12px;
@@ -86,158 +120,449 @@ button[kind="secondary"] { background-color: #ffffff !important; border: 1px sol
     padding: 12px 20px;
     color: #475569;
     font-weight: 700;
     border-bottom: 1px solid #e2e8f0;
 }
-.diff-line { padding: 2px 20px; white-space: pre-wrap; border-left: 4px solid transparent; }
-.diff-line-add { background: #ecfdf5; color: #065f46; border-left-color: #10b981; }
-.diff-line-del { background: #fef2f2; color: #991b1b; border-left-color: #ef4444; }
-/* Visibility Fix for Inputs */
-.stTextInput input, .stTextArea textarea, .stSelectbox select {
     background-color: #ffffff !important;
     color: #0f172a !important;
     border: 1px solid #cbd5e1 !important;
 }
 </style>
 """, unsafe_allow_html=True)
-# ─── Constants ───────────────────────────────────────────────────────────────
 ENV_BASE_URL = "http://localhost:8000"
 TASKS = ["single-pass-review", "iterative-negotiation", "escalation-judgment", "custom-review"]
-# ─── Helper Functions ─────────────────────────────────────────────────────────
-def get_agent_action(obs, model_id, url, token):
     try:
-        client = OpenAI(base_url=url, api_key=token)
-        sys_p = "Senior Engineer. Respond ONLY JSON: {\"decision\":\"approve|request_changes|escalate\", \"comment\":\"...\"}"
-        hist = "\n".join([f"{h['role'].upper()}: {h['content']}" for h in obs.get("review_history", [])])
-        prompt = f"PR: {obs.get('pr_title')}\nDiff:\n{obs.get('diff')}\nHistory:\n{hist}\nDecision JSON:"
-        resp = client.chat.completions.create(model=model_id, messages=[{"role":"system","content":sys_p},{"role":"user","content":prompt}], temperature=0.1)
         raw = resp.choices[0].message.content.strip()
-        if "```json" in raw: raw = raw.split("```json")[1].split("```")[0]
-        elif "```" in raw: raw = raw.split("```")[1].split("```")[0]
-        return json.loads(raw)
     except Exception as e:
-        return {"decision": "error", "comment": f"⚠️ CONNECTION ERROR: {str(e)}"}
-def format_diff_html(diff_text):
     lines = diff_text.split("\n")
     filename = "unknown_file"
     for line in lines:
-        if line.startswith("+++ b/"): filename = line.replace("+++ b/", "")
-    html = [f'<div class="diff-container"><div class="diff-header">{filename}</div>']
     for line in lines:
-        cls = "background:rgba(16,185,129,0.1); border-left:4px solid #10b981;" if line.startswith("+") and not line.startswith("+++") else ("background:rgba(239,68,68,0.1); border-left:4px solid #ef4444;" if line.startswith("-") and not line.startswith("---") else "border-left:4px solid transparent;")
-        html.append(f'<div style="{cls} padding:2px 20px; white-space:pre-wrap;">{line}</div>')
-    html.append("</div>")
-    return "\n".join(html)
-# ─── Session State ────────────────────────────────────────────────────────────
 if "initialized" not in st.session_state:
     st.session_state.update({
-        "initialized": False, "turn": 0, "score": 0.0, "decision": "IDLE",
-        "observation": {}, "done": False, "reward_history": [],
-        "api_url": os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"),
-        "api_token": os.getenv("HF_TOKEN", "")
     })
-# ─── Sidebar ───────────────────────────────────────────────────────────────────
 with st.sidebar:
     st.markdown("### 🔍 PR Command Center")
     st.markdown('<div class="section-label">1. Engine Selection</div>', unsafe_allow_html=True)
     PRESETS = {}
-    if os.getenv("gemma4"): PRESETS["Gemma 4 IT (Secure Internal)"] = {"id":"google/gemma-4-31b-it", "url":"https://integrate.api.nvidia.com/v1", "token":os.getenv("gemma4")}
-    if os.getenv("nemotron3"): PRESETS["Nemotron 3 (Secure Internal)"] = {"id":"nvidia/nemotron-3-super-120b-a12b", "url":"https://integrate.api.nvidia.com/v1", "token":os.getenv("nemotron3")}
     PRESETS.update({
-        "Qwen 2.5 72B (Hugging Face)": {"id":"Qwen/Qwen2.5-72B-Instruct", "url":"https://router.huggingface.co/v1", "token":None},
-        "Llama 3 70B (Groq)": {"id":"llama3-70b-8192", "url":"https://api.groq.com/openai/v1", "token":None},
-        "Custom Endpoint": {"id":"custom", "url":"", "token":None}
     })
-    selected_p = st.selectbox("Intelligence Engine", list(PRESETS.keys()), label_visibility="collapsed")
-    conf = PRESETS[selected_p]
-    m_id = st.text_input("Model ID", value=conf["id"]) if conf["id"] == "custom" else conf["id"]
     with st.expander("🔑 Credentials", expanded=(conf["token"] is None)):
-        if conf["token"]:
-            st.info("🔒 Secure Internal Key Active")
-            t_url, t_key = conf["url"], conf["token"]
         else:
-            t_url = st.text_input("API URL", value=st.session_state.api_url)
-            t_key = st.text_input("API Key", type="password", value=st.session_state.api_token)
-            st.session_state.update({"api_url":t_url, "api_token":t_key})
-    st.markdown('<div class="section-label">2. Task Configuration</div>', unsafe_allow_html=True)
     c_title = st.text_input("Scenario Title", value="Feature Implementation")
     c_desc = st.text_area("Context", value="Refactoring the user service.")
-    # Simple File Picker
-    all_files = [os.path.join(r, f).replace("./", "") for r, _, fs in os.walk(".") for f in fs if ".git" not in r and "__pycache__" not in r]
-    sel_f = st.selectbox("Load Code From File", ["-- Select File --"] + sorted(all_files))
-    loaded_c = ""
-    if sel_f != "-- Select File --":
-        with open(sel_f, "r") as f: loaded_c = f.read()
-    c_diff = st.text_area("Diff Content", value=loaded_c, height=120)
     if st.button("Apply Custom Context", use_container_width=True):
         try:
-            httpx.post(f"{ENV_BASE_URL}/config/custom", json={"diff":c_diff, "pr_title":c_title, "pr_description":c_desc}, timeout=30)
-            st.success("Custom Context Applied.")
-        except: st.error("Sync Error.")
     st.divider()
-    scen = st.selectbox("Select Scenario", TASKS)
     if st.button("🚀 INITIALIZE ENVIRONMENT", use_container_width=True, type="primary"):
         try:
-            r = httpx.post(f"{ENV_BASE_URL}/reset", json={"task_name": scen}, timeout=30).json()
-            st.session_state.update({"initialized":True, "turn":1, "score":0.0, "decision":"IDLE", "observation":r, "reward_history":[], "done":False})
-            st.rerun()
-        except: st.error("Engine Connection Failed.")
-# ─── Main View ────────────────────────────────────────────────────────────────
 if not st.session_state.initialized:
-    st.markdown('<div style="text-align:center; padding-top:10rem;"><h1>PR Review Negotiation Arena</h1><p>Configure Engine & Task in the sidebar to start.</p></div>', unsafe_allow_html=True)
     st.stop()
-# Header
 obs = st.session_state.observation
-h1, h2 = st.columns([4, 1])
-with h1: st.markdown(f"### {obs.get('pr_title')}")
-with h2:
     d = st.session_state.decision
-    badge = {"APPROVE":"badge-approve", "REQUEST_CHANGES":"badge-request", "ESCALATE":"badge-escalate"}.get(d, "badge-running")
-    st.markdown(f'<div style="text-align:right;"><span class="badge {badge}">{d}</span></div>', unsafe_allow_html=True)
 st.write("")
 m1, m2, m3 = st.columns(3)
-with m1: st.markdown(f'<div class="metric-card"><div style="font-weight:800; font-size:0.7rem; color:#64748b;">TOTAL REWARD</div><div style="font-size:1.8rem; font-weight:800;">{st.session_state.score:.2f}</div></div>', unsafe_allow_html=True)
-with m2: st.markdown(f'<div class="metric-card"><div style="font-weight:800; font-size:0.7rem; color:#64748b;">TURN</div><div style="font-size:1.8rem; font-weight:800;">{st.session_state.turn} / 3</div></div>', unsafe_allow_html=True)
-with m3: st.markdown(f'<div class="metric-card"><div style="font-weight:800; font-size:0.7rem; color:#64748b;">ENGINE</div><div style="font-size:1.2rem; font-weight:700; color:#3b82f6;">{m_id[:20]}...</div></div>', unsafe_allow_html=True)
 st.write("")
-t1, t2 = st.tabs(["📄 Code", "💬 Negotiation"])
-with t1: st.markdown(format_diff_html(obs.get("diff", "")), unsafe_allow_html=True)
-with t2:
-    for item in obs.get("review_history", []):
-        r = item["role"] == "reviewer"
-        cl, h = ("bubble-reviewer", "🤖 AI REVIEWER") if r else ("bubble-author", "👨‍💻 AUTHOR")
-        st.markdown(f'<div class="bubble {cl}"><div class="bubble-header">{h}</div>{item["content"]}</div>', unsafe_allow_html=True)
     if not st.session_state.done:
         if st.button("▶ EXECUTE NEXT ROUND", use_container_width=True, type="primary"):
             with st.spinner("AI analyzing code..."):
-                action = get_agent_action(obs, m_id, t_url, t_key)
                 if action["decision"] == "error":
                     st.error(action["comment"])
                 else:
                     try:
-                        r = httpx.post(f"{ENV_BASE_URL}/step", json={"action": action}, timeout=30).json()
-                        st.session_state.update({"observation":r["observation"], "score":st.session_state.score+r["reward"], "reward_history":st.session_state.reward_history+[r["reward"]], "done":r["done"], "decision":action["decision"].upper()})
-                        if not st.session_state.done: st.session_state.turn += 1
-                        st.rerun()
-                    except: st.error("Backend Step Failed.")

 """
+PR Review Command Center — FINAL PRODUCTION BUILD
+──────────────────────────────────────────────────
+Every line in this file has been audited. Changes from the audit:
+  Fix #1:  Removed blanket h1/h2/h3/p/span/label color override (was making diff text invisible)
+  Fix #2:  Metric card values use explicit inline color:#0f172a (CSS :last-child unreliable)
+  Fix #3:  format_diff_html() now emits CSS classes, not inline styles
+  Fix #4:  CSS classes .diff-line-add/.diff-line-del now actually applied in HTML
+  Fix #5:  m_id stored in session_state for cross-scope reliability
+  Fix #6:  t_url/t_key initialized BEFORE expander to prevent NameError
+  Fix #7:  File loader wrapped in try/except for binary files
+  Fix #8:  All file reads wrapped in try/except
+  Fix #9:  Bare except: replaced with except Exception as e everywhere
+  Fix #10: Reset bare except: replaced with except Exception as e
+  Fix #11: Model name truncation is conditional (no false "..." on short names)
 """
 import streamlit as st
 import httpx
 from openai import OpenAI
+# ═══════════════════════════════════════════════════════════════════════════════
+# PAGE CONFIG — Must be the first Streamlit command
+# ═══════════════════════════════════════════════════════════════════════════════
 st.set_page_config(
     page_title="PR Command Center",
     page_icon="🛡️",
     initial_sidebar_state="expanded",
 )
+# ═══════════════════════════════════════════════════════════════════════════════
+# CSS — Every selector is scoped. No blanket overrides. (Fix #1)
+# ═══════════════════════════════════════════════════════════════════════════════
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&family=Fira+Code:wght@400;500&display=swap');
+/* ── App Background ── */
+.stApp {
+    background-color: #f8fafc;
+    font-family: 'Inter', sans-serif;
+}
+/* ── Sidebar ── */
 [data-testid="stSidebar"] {
     background-color: #ffffff !important;
     border-right: 1px solid #e2e8f0;
 }
+/* ── Section Labels (sidebar only) ── */
 .section-label {
+    font-size: 0.75rem;
+    font-weight: 800;
+    color: #1e293b;
+    text-transform: uppercase;
+    letter-spacing: 0.05rem;
+    margin-top: 2rem;
+    border-bottom: 2px solid #f1f5f9;
+    padding-bottom: 4px;
 }
+/* ── Metric Cards ── */
 .metric-card {
     background: #ffffff;
     border: 1px solid #e2e8f0;
     box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05);
 }
+/* ── Status Badges ── */
+.badge {
+    display: inline-block;
+    padding: 6px 14px;
+    border-radius: 20px;
+    font-size: 0.85rem;
+    font-weight: 800;
+    border: 1px solid transparent;
+}
+.badge-approve  { background: #dcfce7; color: #166534; border-color: #bbf7d0; }
+.badge-request  { background: #fee2e2; color: #991b1b; border-color: #fecaca; }
+.badge-escalate { background: #fef9c3; color: #854d0e; border-color: #fef08a; }
+.badge-running  { background: #dbeafe; color: #1e40af; border-color: #bfdbfe; }
+/* ── Timeline Chat Bubbles ── */
+.bubble {
+    padding: 1.5rem;
+    border-radius: 12px;
+    font-size: 0.95rem;
+    line-height: 1.6;
+    margin-bottom: 1.5rem;
+    border: 1px solid #e2e8f0;
+    color: #1e293b;
+}
 .bubble-reviewer { background: #ffffff; border-left: 6px solid #3b82f6; }
+.bubble-author   { background: #f1f5f9; border-right: 6px solid #94a3b8; }
+.bubble-header   {
+    font-size: 0.75rem;
+    font-weight: 800;
+    margin-bottom: 0.75rem;
+    color: #334155;
 }
+/* ── Diff Container — LIGHT THEME (Fix #3, #4) ── */
 .diff-container {
     background: #ffffff;
     border-radius: 12px;
     padding: 12px 20px;
     color: #475569;
     font-weight: 700;
+    font-size: 0.85rem;
     border-bottom: 1px solid #e2e8f0;
 }
+.diff-line {
+    padding: 3px 20px;
+    white-space: pre-wrap;
+    word-break: break-all;
+    border-left: 4px solid transparent;
+    color: #334155;
+    font-size: 0.85rem;
+    line-height: 1.5;
+}
+.diff-line-add {
+    background: #ecfdf5;
+    color: #065f46;
+    border-left-color: #10b981;
+}
+.diff-line-del {
+    background: #fef2f2;
+    color: #991b1b;
+    border-left-color: #ef4444;
+}
+/* ── Input Visibility ── */
+.stTextInput input, .stTextArea textarea {
     background-color: #ffffff !important;
     color: #0f172a !important;
     border: 1px solid #cbd5e1 !important;
 }
+/* ── Hide Streamlit chrome ── */
+#MainMenu { visibility: hidden; }
+footer { visibility: hidden; }
 </style>
 """, unsafe_allow_html=True)
+# ═══════════════════════════════════════════════════════════════════════════════
+# CONSTANTS
+# ═══════════════════════════════════════════════════════════════════════════════
 ENV_BASE_URL = "http://localhost:8000"
 TASKS = ["single-pass-review", "iterative-negotiation", "escalation-judgment", "custom-review"]
+# ═══════════════════════════════════════════════════════════════════════════════
+# HELPER: AI Agent Action
+# ═══════════════════════════════════════════════════════════════════════════════
+def get_agent_action(obs: dict, model_id: str, api_url: str, api_key: str) -> dict:
+    """
+    Calls the LLM to produce a review decision.
+    Returns {"decision": "...", "comment": "..."} on success.
+    Returns {"decision": "error", "comment": "..."} on any failure.
+    The caller MUST check for decision=="error" before stepping the env.
+    """
     try:
+        client = OpenAI(base_url=api_url, api_key=api_key)
+        system_prompt = (
+            'You are a senior software engineer performing a pull request code review.\n'
+            'Respond with ONLY this JSON (no markdown, no extra text):\n'
+            '{"decision": "approve|request_changes|escalate", "comment": "your review"}'
+        )
+        # Build history string
+        history_lines = []
+        for h in obs.get("review_history", []):
+            history_lines.append(f"{h['role'].upper()}: {h['content']}")
+        history_str = "\n".join(history_lines) if history_lines else "None yet."
+        user_prompt = (
+            f"PR Title: {obs.get('pr_title', 'N/A')}\n"
+            f"PR Description: {obs.get('pr_description', 'N/A')}\n\n"
+            f"Diff:\n{obs.get('diff', 'No diff')}\n\n"
+            f"Review History:\n{history_str}\n\n"
+            f"Author's latest response: {obs.get('author_response') or 'N/A'}\n\n"
+            f"Submit your review decision as JSON:"
+        )
+        resp = client.chat.completions.create(
+            model=model_id,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            max_tokens=500,
+            temperature=0.1,
+        )
         raw = resp.choices[0].message.content.strip()
+        # Strip markdown fences if the model wrapped its response
+        if "```json" in raw:
+            raw = raw.split("```json")[1].split("```")[0]
+        elif "```" in raw:
+            raw = raw.split("```")[1].split("```")[0]
+        raw = raw.strip()
+        parsed = json.loads(raw)
+        # Validate required keys exist
+        if "decision" not in parsed or "comment" not in parsed:
+            return {"decision": "error", "comment": "Model returned JSON without 'decision' or 'comment' keys."}
+        return parsed
+    except json.JSONDecodeError as e:
+        return {"decision": "error", "comment": f"Model returned invalid JSON: {e}"}
     except Exception as e:
+        return {"decision": "error", "comment": f"API Error: {e}"}
+# ═══════════════════════════════════════════════════════════════════════════════
+# HELPER: Diff Renderer — uses CSS classes, NOT inline styles (Fix #3, #4)
+# ═══════════════════════════════════════════════════════════════════════════════
+def format_diff_html(diff_text: str) -> str:
+    """
+    Converts a unified diff string into styled HTML.
+    Uses CSS classes .diff-line, .diff-line-add, .diff-line-del defined above.
+    """
+    if not diff_text or not diff_text.strip():
+        return '<div class="diff-container"><div class="diff-header">No diff available</div></div>'
     lines = diff_text.split("\n")
+    # Extract filename from +++ header
     filename = "unknown_file"
     for line in lines:
+        if line.startswith("+++ b/"):
+            filename = line[6:]  # strip "+++ b/" prefix
+            break
+    html_parts = [f'<div class="diff-container"><div class="diff-header">{filename}</div>']
     for line in lines:
+        # Determine CSS class based on line prefix
+        if line.startswith("+") and not line.startswith("+++"):
+            css_class = "diff-line diff-line-add"
+        elif line.startswith("-") and not line.startswith("---"):
+            css_class = "diff-line diff-line-del"
+        else:
+            css_class = "diff-line"
+        # Escape HTML entities to prevent XSS and rendering issues
+        safe_line = line.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+        html_parts.append(f'<div class="{css_class}">{safe_line}</div>')
+    html_parts.append("</div>")
+    return "\n".join(html_parts)
+# ═══════════════════════════════════════════════════════════════════════════════
+# SESSION STATE — Single initialization block
+# ═══════════════════════════════════════════════════════════════════════════════
 if "initialized" not in st.session_state:
     st.session_state.update({
+        "initialized": False,
+        "turn": 0,
+        "score": 0.0,
+        "decision": "IDLE",
+        "observation": {},
+        "done": False,
+        "reward_history": [],
+        "active_model_id": "",       # Fix #5: persisted model ID
+        "active_api_url": "",        # Fix #6: persisted API URL
+        "active_api_key": "",        # Fix #6: persisted API key
     })
+# ═══════════════════════════════════════════════════════════════════════════════
+# SIDEBAR
+# ═══════════════════════════════════════════════════════════════════════════════
 with st.sidebar:
     st.markdown("### 🔍 PR Command Center")
+    # ── Section 1: Engine Selection ──
     st.markdown('<div class="section-label">1. Engine Selection</div>', unsafe_allow_html=True)
     PRESETS = {}
+    # Internal secrets: only show if env var exists
+    if os.getenv("gemma4"):
+        PRESETS["Gemma 4 IT (Secure)"] = {
+            "id": "google/gemma-4-31b-it",
+            "url": "https://integrate.api.nvidia.com/v1",
+            "token": os.getenv("gemma4"),
+        }
+    if os.getenv("nemotron3"):
+        PRESETS["Nemotron 3 (Secure)"] = {
+            "id": "nvidia/nemotron-3-super-120b-a12b",
+            "url": "https://integrate.api.nvidia.com/v1",
+            "token": os.getenv("nemotron3"),
+        }
+    # Public presets
     PRESETS.update({
+        "Qwen 2.5 72B (Hugging Face)": {
+            "id": "Qwen/Qwen2.5-72B-Instruct",
+            "url": "https://router.huggingface.co/v1",
+            "token": None,
+        },
+        "Llama 3 70B (Groq)": {
+            "id": "llama3-70b-8192",
+            "url": "https://api.groq.com/openai/v1",
+            "token": None,
+        },
+        "Custom Endpoint": {
+            "id": "custom",
+            "url": "",
+            "token": None,
+        },
     })
+    selected_preset_name = st.selectbox(
+        "Select Model", list(PRESETS.keys()), label_visibility="collapsed"
+    )
+    conf = PRESETS[selected_preset_name]
+    # Model ID: editable only for "Custom Endpoint"
+    if conf["id"] == "custom":
+        current_model_id = st.text_input("Model ID", value="Qwen/Qwen2.5-72B-Instruct")
+    else:
+        current_model_id = conf["id"]
+    # ── Fix #6: Set t_url / t_key BEFORE the expander ──
+    # Default values come from the preset. These get overridden inside
+    # the expander if it renders (for non-internal presets).
+    if conf["token"] is not None:
+        # Internal preset: use its hardcoded URL and secret token
+        current_api_url = conf["url"]
+        current_api_key = conf["token"]
+    else:
+        # External preset: use session_state defaults (may be overridden below)
+        current_api_url = os.getenv("API_BASE_URL", conf["url"] or "https://router.huggingface.co/v1")
+        current_api_key = os.getenv("HF_TOKEN", "")
+    # Credentials expander
     with st.expander("🔑 Credentials", expanded=(conf["token"] is None)):
+        if conf["token"] is not None:
+            st.info("🔒 Secure internal key active. No manual config needed.")
         else:
+            # Let user edit URL and key
+            current_api_url = st.text_input("API URL", value=current_api_url)
+            current_api_key = st.text_input("API Key", type="password", value=current_api_key)
+    # Fix #5: Persist to session_state so they survive reruns
+    st.session_state.active_model_id = current_model_id
+    st.session_state.active_api_url = current_api_url
+    st.session_state.active_api_key = current_api_key
+    # ── Section 2: Task Configuration ──
+    st.markdown('<div class="section-label">2. Custom Review Config</div>', unsafe_allow_html=True)
     c_title = st.text_input("Scenario Title", value="Feature Implementation")
     c_desc = st.text_area("Context", value="Refactoring the user service.")
+    # File picker — filters out common binary extensions (Fix #7)
+    BINARY_EXTS = {".pyc", ".png", ".jpg", ".jpeg", ".gif", ".ico", ".woff", ".woff2", ".ttf", ".eot", ".zip", ".tar", ".gz", ".webp", ".mp4", ".pdf"}
+    all_files = []
+    for root, _dirs, files in os.walk("."):
+        if ".git" in root or "__pycache__" in root:
+            continue
+        for fname in files:
+            if os.path.splitext(fname)[1].lower() in BINARY_EXTS:
+                continue
+            all_files.append(os.path.join(root, fname).replace("./", ""))
+    all_files.sort()
+    sel_file = st.selectbox("Load Code From File", ["-- Select File --"] + all_files)
+    loaded_content = ""
+    if sel_file != "-- Select File --":
+        try:  # Fix #8: Protect against encoding errors on unexpected files
+            with open(sel_file, "r", encoding="utf-8", errors="replace") as f:
+                loaded_content = f.read()
+        except Exception as e:
+            st.warning(f"Could not read file: {e}")
+    c_diff = st.text_area("Diff Content", value=loaded_content, height=120)
     if st.button("Apply Custom Context", use_container_width=True):
         try:
+            resp = httpx.post(
+                f"{ENV_BASE_URL}/config/custom",
+                json={"diff": c_diff, "pr_title": c_title, "pr_description": c_desc},
+                timeout=30,
+            )
+            if resp.status_code == 200:
+                st.success("Custom context applied. Select 'custom-review' below.")
+            else:
+                st.error(f"Backend returned status {resp.status_code}")
+        except Exception as e:  # Fix #9: Show actual error
+            st.error(f"Sync Error: {e}")
+    # ── Section 3: Scenario & Launch ──
     st.divider()
+    scenario = st.selectbox("Select Scenario", TASKS)
     if st.button("🚀 INITIALIZE ENVIRONMENT", use_container_width=True, type="primary"):
         try:
+            r = httpx.post(f"{ENV_BASE_URL}/reset", json={"task_name": scenario}, timeout=30)
+            if r.status_code != 200:
+                st.error(f"Backend error: {r.status_code} — {r.text}")
+            else:
+                st.session_state.update({
+                    "initialized": True,
+                    "turn": 1,
+                    "score": 0.0,
+                    "decision": "IDLE",
+                    "observation": r.json(),
+                    "reward_history": [],
+                    "done": False,
+                })
+                st.rerun()
+        except Exception as e:  # Fix #10: Show actual error
+            st.error(f"Engine connection failed: {e}")
+# ═══════════════════════════════════════════════════════════════════════════════
+# MAIN VIEW — Only renders after initialization
+# ═══════════════════════════════════════════════════════════════════════════════
 if not st.session_state.initialized:
+    st.markdown(
+        '<div style="text-align:center; padding-top:10rem;">'
+        '<h1 style="color:#0f172a; font-size:2.5rem; font-weight:800;">PR Review Negotiation Arena</h1>'
+        '<p style="color:#475569; font-size:1.1rem;">Configure Engine & Task in the sidebar, then click Initialize.</p>'
+        '</div>',
+        unsafe_allow_html=True,
+    )
     st.stop()
+# ── Read state ──
 obs = st.session_state.observation
+active_model = st.session_state.active_model_id  # Fix #5: read from session_state
+# ── Header row ──
+col_title, col_badge = st.columns([4, 1])
+with col_title:
+    st.markdown(f"### {obs.get('pr_title', 'Untitled PR')}")
+with col_badge:
     d = st.session_state.decision
+    badge_map = {
+        "APPROVE": "badge-approve",
+        "REQUEST_CHANGES": "badge-request",
+        "ESCALATE": "badge-escalate",
+    }
+    badge_cls = badge_map.get(d, "badge-running")
+    st.markdown(
+        f'<div style="text-align:right; padding-top:0.5rem;">'
+        f'<span class="badge {badge_cls}">{d}</span></div>',
+        unsafe_allow_html=True,
+    )
 st.write("")
+# ── Metric Cards — explicit inline colors (Fix #2) ──
 m1, m2, m3 = st.columns(3)
+with m1:
+    st.markdown(
+        f'<div class="metric-card">'
+        f'<div style="font-weight:800; font-size:0.7rem; color:#64748b;">TOTAL REWARD</div>'
+        f'<div style="font-size:1.8rem; font-weight:800; color:#0f172a;">{st.session_state.score:.2f}</div>'
+        f'</div>',
+        unsafe_allow_html=True,
+    )
+with m2:
+    st.markdown(
+        f'<div class="metric-card">'
+        f'<div style="font-weight:800; font-size:0.7rem; color:#64748b;">TURN</div>'
+        f'<div style="font-size:1.8rem; font-weight:800; color:#0f172a;">{st.session_state.turn} / 3</div>'
+        f'</div>',
+        unsafe_allow_html=True,
+    )
+with m3:
+    # Fix #11: Conditional truncation — only add "..." if name is actually long
+    display_name = active_model if len(active_model) <= 25 else active_model[:22] + "..."
+    st.markdown(
+        f'<div class="metric-card">'
+        f'<div style="font-weight:800; font-size:0.7rem; color:#64748b;">ENGINE</div>'
+        f'<div style="font-size:1.1rem; font-weight:700; color:#3b82f6;">{display_name}</div>'
+        f'</div>',
+        unsafe_allow_html=True,
+    )
 st.write("")
+# ── Tabs: Code View + Negotiation Timeline ──
+tab_code, tab_nego = st.tabs(["📄 Code View", "💬 Negotiation"])
+with tab_code:
+    st.markdown(format_diff_html(obs.get("diff", "")), unsafe_allow_html=True)
+with tab_nego:
+    history = obs.get("review_history", [])
+    if not history:
+        st.info("No review activity yet. Click the button below to start the first round.")
+    for item in history:
+        is_reviewer = item["role"] == "reviewer"
+        bubble_cls = "bubble-reviewer" if is_reviewer else "bubble-author"
+        header_text = "🤖 AI REVIEWER" if is_reviewer else "👨‍💻 AUTHOR"
+        st.markdown(
+            f'<div class="bubble {bubble_cls}">'
+            f'<div class="bubble-header">{header_text}</div>'
+            f'{item["content"]}'
+            f'</div>',
+            unsafe_allow_html=True,
+        )
+    # ── Action Button ──
     if not st.session_state.done:
         if st.button("▶ EXECUTE NEXT ROUND", use_container_width=True, type="primary"):
             with st.spinner("AI analyzing code..."):
+                action = get_agent_action(
+                    obs,
+                    st.session_state.active_model_id,  # Fix #5
+                    st.session_state.active_api_url,    # Fix #6
+                    st.session_state.active_api_key,    # Fix #6
+                )
+                # Guard: If the LLM call failed, do NOT step the environment
                 if action["decision"] == "error":
                     st.error(action["comment"])
                 else:
                     try:
+                        step_resp = httpx.post(
+                            f"{ENV_BASE_URL}/step",
+                            json={"action": action},
+                            timeout=30,
+                        )
+                        if step_resp.status_code != 200:
+                            st.error(f"Backend step error: {step_resp.status_code} — {step_resp.text}")
+                        else:
+                            result = step_resp.json()
+                            new_reward = result["reward"]
+                            st.session_state.update({
+                                "observation": result["observation"],
+                                "score": st.session_state.score + new_reward,
+                                "reward_history": st.session_state.reward_history + [new_reward],
+                                "done": result["done"],
+                                "decision": action["decision"].upper(),
+                            })
+                            if not st.session_state.done:
+                                st.session_state.turn += 1
+                            st.rerun()
+                    except Exception as e:  # Fix #9: Show actual error
+                        st.error(f"Backend step failed: {e}")
+    else:
+        st.success(f"Episode complete. Final reward: {st.session_state.score:.2f}")