"""
PR Review Command Center — FINAL PRODUCTION BUILD
──────────────────────────────────────────────────
Every line in this file has been audited. Changes from the audit:
Fix #1: Removed blanket h1/h2/h3/p/span/label color override (was making diff text invisible)
Fix #2: Metric card values use explicit inline color:#0f172a (CSS :last-child unreliable)
Fix #3: format_diff_html() now emits CSS classes, not inline styles
Fix #4: CSS classes .diff-line-add/.diff-line-del now actually applied in HTML
Fix #5: m_id stored in session_state for cross-scope reliability
Fix #6: t_url/t_key initialized BEFORE expander to prevent NameError
Fix #7: File loader wrapped in try/except for binary files
Fix #8: All file reads wrapped in try/except
Fix #9: Bare except: replaced with except Exception as e everywhere
Fix #10: Reset bare except: replaced with except Exception as e
Fix #11: Model name truncation is conditional (no false "..." on short names)
"""
import streamlit as st
import json
import os
import httpx
from openai import OpenAI
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE CONFIG — Must be the first Streamlit command
# ═══════════════════════════════════════════════════════════════════════════════
st.set_page_config(
page_title="PR Command Center",
page_icon="🛡️",
layout="wide",
initial_sidebar_state="expanded",
)
# ═══════════════════════════════════════════════════════════════════════════════
# CSS — Every selector is scoped. No blanket overrides. (Fix #1)
# ═══════════════════════════════════════════════════════════════════════════════
st.markdown("""
""", unsafe_allow_html=True)
# ═══════════════════════════════════════════════════════════════════════════════
# CONSTANTS
# ═══════════════════════════════════════════════════════════════════════════════
ENV_BASE_URL = "http://localhost:8000"
TASKS = ["single-pass-review", "iterative-negotiation", "escalation-judgment", "custom-review"]
# ═══════════════════════════════════════════════════════════════════════════════
# HELPER: AI Agent Action
# ═══════════════════════════════════════════════════════════════════════════════
def get_agent_action(obs: dict, model_id: str, api_url: str, api_key: str) -> dict:
"""
Calls the LLM to produce a review decision.
Returns {"decision": "...", "comment": "..."} on success.
Returns {"decision": "error", "comment": "..."} on any failure.
The caller MUST check for decision=="error" before stepping the env.
"""
try:
client = OpenAI(base_url=api_url, api_key=api_key)
system_prompt = (
'You are a senior software engineer performing a pull request code review.\n'
'Respond with ONLY this JSON (no markdown, no extra text):\n'
'{"decision": "approve|request_changes|escalate", "comment": "your review"}'
)
# Build history string
history_lines = []
for h in obs.get("review_history", []):
history_lines.append(f"{h['role'].upper()}: {h['content']}")
history_str = "\n".join(history_lines) if history_lines else "None yet."
user_prompt = (
f"PR Title: {obs.get('pr_title', 'N/A')}\n"
f"PR Description: {obs.get('pr_description', 'N/A')}\n\n"
f"Diff:\n{obs.get('diff', 'No diff')}\n\n"
f"Review History:\n{history_str}\n\n"
f"Author's latest response: {obs.get('author_response') or 'N/A'}\n\n"
f"Submit your review decision as JSON:"
)
resp = client.chat.completions.create(
model=model_id,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
max_tokens=500,
temperature=0.1,
)
raw = resp.choices[0].message.content.strip()
# Strip markdown fences if the model wrapped its response
if "```json" in raw:
raw = raw.split("```json")[1].split("```")[0]
elif "```" in raw:
raw = raw.split("```")[1].split("```")[0]
raw = raw.strip()
parsed = json.loads(raw)
# Validate required keys exist
if "decision" not in parsed or "comment" not in parsed:
return {"decision": "error", "comment": "Model returned JSON without 'decision' or 'comment' keys."}
return parsed
except json.JSONDecodeError as e:
return {"decision": "error", "comment": f"Model returned invalid JSON: {e}"}
except Exception as e:
return {"decision": "error", "comment": f"API Error: {e}"}
# ═══════════════════════════════════════════════════════════════════════════════
# HELPER: Diff Renderer — uses CSS classes, NOT inline styles (Fix #3, #4)
# ═══════════════════════════════════════════════════════════════════════════════
def format_diff_html(diff_text: str) -> str:
"""
Converts a unified diff string into styled HTML.
Uses CSS classes .diff-line, .diff-line-add, .diff-line-del defined above.
"""
if not diff_text or not diff_text.strip():
return '
'
lines = diff_text.split("\n")
# Extract filename from +++ header
filename = "unknown_file"
for line in lines:
if line.startswith("+++ b/"):
filename = line[6:] # strip "+++ b/" prefix
break
html_parts = [f'']
for line in lines:
# Determine CSS class based on line prefix
if line.startswith("+") and not line.startswith("+++"):
css_class = "diff-line diff-line-add"
elif line.startswith("-") and not line.startswith("---"):
css_class = "diff-line diff-line-del"
else:
css_class = "diff-line"
# Escape HTML entities to prevent XSS and rendering issues
safe_line = line.replace("&", "&").replace("<", "<").replace(">", ">")
html_parts.append(f'
{safe_line}
')
html_parts.append("
")
return "\n".join(html_parts)
# ═══════════════════════════════════════════════════════════════════════════════
# SESSION STATE — Single initialization block
# ═══════════════════════════════════════════════════════════════════════════════
if "initialized" not in st.session_state:
st.session_state.update({
"initialized": False,
"turn": 0,
"score": 0.0,
"decision": "IDLE",
"observation": {},
"done": False,
"reward_history": [],
"active_model_id": "", # Fix #5: persisted model ID
"active_api_url": "", # Fix #6: persisted API URL
"active_api_key": "", # Fix #6: persisted API key
})
# ═══════════════════════════════════════════════════════════════════════════════
# SIDEBAR
# ═══════════════════════════════════════════════════════════════════════════════
with st.sidebar:
st.markdown("### 🔍 PR Command Center")
# ── Section 1: Engine Selection ──
st.markdown('1. Engine Selection
', unsafe_allow_html=True)
PRESETS = {}
# Internal secrets: only show if env var exists
if os.getenv("gemma4"):
PRESETS["Gemma 4 IT (Secure)"] = {
"id": "google/gemma-4-31b-it",
"url": "https://integrate.api.nvidia.com/v1",
"token": os.getenv("gemma4"),
}
if os.getenv("nemotron3"):
PRESETS["Nemotron 3 (Secure)"] = {
"id": "nvidia/nemotron-3-super-120b-a12b",
"url": "https://integrate.api.nvidia.com/v1",
"token": os.getenv("nemotron3"),
}
# Public presets
PRESETS.update({
"Qwen 2.5 72B (Hugging Face)": {
"id": "Qwen/Qwen2.5-72B-Instruct",
"url": "https://router.huggingface.co/v1",
"token": None,
},
"Llama 3 70B (Groq)": {
"id": "llama3-70b-8192",
"url": "https://api.groq.com/openai/v1",
"token": None,
},
"Custom Endpoint": {
"id": "custom",
"url": "",
"token": None,
},
})
selected_preset_name = st.selectbox(
"Select Model", list(PRESETS.keys()), label_visibility="collapsed"
)
conf = PRESETS[selected_preset_name]
# Model ID: editable only for "Custom Endpoint"
if conf["id"] == "custom":
current_model_id = st.text_input("Model ID", value="Qwen/Qwen2.5-72B-Instruct")
else:
current_model_id = conf["id"]
# ── Fix #6: Set t_url / t_key BEFORE the expander ──
# Default values come from the preset. These get overridden inside
# the expander if it renders (for non-internal presets).
if conf["token"] is not None:
# Internal preset: use its hardcoded URL and secret token
current_api_url = conf["url"]
current_api_key = conf["token"]
else:
# External preset: use session_state defaults (may be overridden below)
current_api_url = os.getenv("API_BASE_URL", conf["url"] or "https://router.huggingface.co/v1")
current_api_key = os.getenv("HF_TOKEN", "")
# Credentials expander
with st.expander("🔑 Credentials", expanded=(conf["token"] is None)):
if conf["token"] is not None:
st.info("🔒 Secure internal key active. No manual config needed.")
else:
# Let user edit URL and key
current_api_url = st.text_input("API URL", value=current_api_url)
current_api_key = st.text_input("API Key", type="password", value=current_api_key)
# Fix #5: Persist to session_state so they survive reruns
st.session_state.active_model_id = current_model_id
st.session_state.active_api_url = current_api_url
st.session_state.active_api_key = current_api_key
# ── Section 2: Task Configuration ──
st.markdown('2. Custom Review Config
', unsafe_allow_html=True)
c_title = st.text_input("Scenario Title", value="Feature Implementation")
c_desc = st.text_area("Context", value="Refactoring the user service.")
# File picker — filters out common binary extensions (Fix #7)
BINARY_EXTS = {".pyc", ".png", ".jpg", ".jpeg", ".gif", ".ico", ".woff", ".woff2", ".ttf", ".eot", ".zip", ".tar", ".gz", ".webp", ".mp4", ".pdf"}
all_files = []
for root, _dirs, files in os.walk("."):
if ".git" in root or "__pycache__" in root:
continue
for fname in files:
if os.path.splitext(fname)[1].lower() in BINARY_EXTS:
continue
all_files.append(os.path.join(root, fname).replace("./", ""))
all_files.sort()
sel_file = st.selectbox("Load Code From File", ["-- Select File --"] + all_files)
loaded_content = ""
if sel_file != "-- Select File --":
try: # Fix #8: Protect against encoding errors on unexpected files
with open(sel_file, "r", encoding="utf-8", errors="replace") as f:
loaded_content = f.read()
except Exception as e:
st.warning(f"Could not read file: {e}")
c_diff = st.text_area("Diff Content", value=loaded_content, height=120)
if st.button("Apply Custom Context", use_container_width=True):
try:
resp = httpx.post(
f"{ENV_BASE_URL}/config/custom",
json={"diff": c_diff, "pr_title": c_title, "pr_description": c_desc},
timeout=30,
)
if resp.status_code == 200:
st.success("Custom context applied. Select 'custom-review' below.")
else:
st.error(f"Backend returned status {resp.status_code}")
except Exception as e: # Fix #9: Show actual error
st.error(f"Sync Error: {e}")
# ── Section 3: Scenario & Launch ──
st.divider()
scenario = st.selectbox("Select Scenario", TASKS)
if st.button("🚀 INITIALIZE ENVIRONMENT", use_container_width=True, type="primary"):
try:
r = httpx.post(f"{ENV_BASE_URL}/reset", json={"task_name": scenario}, timeout=30)
if r.status_code != 200:
st.error(f"Backend error: {r.status_code} — {r.text}")
else:
st.session_state.update({
"initialized": True,
"turn": 1,
"score": 0.0,
"decision": "IDLE",
"observation": r.json(),
"reward_history": [],
"done": False,
})
st.rerun()
except Exception as e: # Fix #10: Show actual error
st.error(f"Engine connection failed: {e}")
# ═══════════════════════════════════════════════════════════════════════════════
# MAIN VIEW — Only renders after initialization
# ═══════════════════════════════════════════════════════════════════════════════
if not st.session_state.initialized:
st.markdown(
''
'
PR Review Negotiation Arena
'
'
Configure Engine & Task in the sidebar, then click Initialize.
'
'
',
unsafe_allow_html=True,
)
st.stop()
# ── Read state ──
obs = st.session_state.observation
active_model = st.session_state.active_model_id # Fix #5: read from session_state
# ── Header row ──
col_title, col_badge = st.columns([4, 1])
with col_title:
st.markdown(f"### {obs.get('pr_title', 'Untitled PR')}")
with col_badge:
d = st.session_state.decision
badge_map = {
"APPROVE": "badge-approve",
"REQUEST_CHANGES": "badge-request",
"ESCALATE": "badge-escalate",
}
badge_cls = badge_map.get(d, "badge-running")
st.markdown(
f''
f'{d}
',
unsafe_allow_html=True,
)
st.write("")
# ── Metric Cards — explicit inline colors (Fix #2) ──
m1, m2, m3 = st.columns(3)
with m1:
st.markdown(
f''
f'
TOTAL REWARD
'
f'
{st.session_state.score:.2f}
'
f'
',
unsafe_allow_html=True,
)
with m2:
st.markdown(
f''
f'
TURN
'
f'
{st.session_state.turn} / 3
'
f'
',
unsafe_allow_html=True,
)
with m3:
# Fix #11: Conditional truncation — only add "..." if name is actually long
display_name = active_model if len(active_model) <= 25 else active_model[:22] + "..."
st.markdown(
f''
f'
ENGINE
'
f'
{display_name}
'
f'
',
unsafe_allow_html=True,
)
st.write("")
# ── Tabs: Code View + Negotiation Timeline ──
tab_code, tab_nego = st.tabs(["📄 Code View", "💬 Negotiation"])
with tab_code:
st.markdown(format_diff_html(obs.get("diff", "")), unsafe_allow_html=True)
with tab_nego:
history = obs.get("review_history", [])
if not history:
st.info("No review activity yet. Click the button below to start the first round.")
for item in history:
is_reviewer = item["role"] == "reviewer"
bubble_cls = "bubble-reviewer" if is_reviewer else "bubble-author"
header_text = "🤖 AI REVIEWER" if is_reviewer else "👨💻 AUTHOR"
st.markdown(
f''
f''
f'{item["content"]}'
f'
',
unsafe_allow_html=True,
)
# ── Action Button ──
if not st.session_state.done:
if st.button("▶ EXECUTE NEXT ROUND", use_container_width=True, type="primary"):
with st.spinner("AI analyzing code..."):
action = get_agent_action(
obs,
st.session_state.active_model_id, # Fix #5
st.session_state.active_api_url, # Fix #6
st.session_state.active_api_key, # Fix #6
)
# Guard: If the LLM call failed, do NOT step the environment
if action["decision"] == "error":
st.error(action["comment"])
else:
try:
step_resp = httpx.post(
f"{ENV_BASE_URL}/step",
json={"action": action},
timeout=30,
)
if step_resp.status_code != 200:
st.error(f"Backend step error: {step_resp.status_code} — {step_resp.text}")
else:
result = step_resp.json()
new_reward = result["reward"]
st.session_state.update({
"observation": result["observation"],
"score": st.session_state.score + new_reward,
"reward_history": st.session_state.reward_history + [new_reward],
"done": result["done"],
"decision": action["decision"].upper(),
})
if not st.session_state.done:
st.session_state.turn += 1
st.rerun()
except Exception as e: # Fix #9: Show actual error
st.error(f"Backend step failed: {e}")
else:
st.success(f"Episode complete. Final reward: {st.session_state.score:.2f}")