Spaces:
Sleeping
Sleeping
Add CanLex Path B web app: Gradio thin client
Browse files- .gitignore +2 -0
- Dockerfile +25 -0
- README.md +58 -6
- app.py +300 -0
- requirements.txt +5 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CanLex Web (Path B) -- the private Gradio front-end.
|
| 2 |
+
#
|
| 3 |
+
# A thin client: it carries no corpus and loads no models. It calls the deployed
|
| 4 |
+
# CanLex MCP server for retrieval and Google Gemini for answer composition.
|
| 5 |
+
# Builds on Hugging Face Spaces (sdk: docker) or with plain Docker.
|
| 6 |
+
FROM python:3.12-slim
|
| 7 |
+
|
| 8 |
+
# Run as a non-root user (UID 1000) -- required by Hugging Face Spaces.
|
| 9 |
+
RUN useradd --create-home --home-dir /app --uid 1000 app
|
| 10 |
+
WORKDIR /app
|
| 11 |
+
|
| 12 |
+
# Python dependencies first, so this layer caches across code changes.
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
COPY --chown=app:app app.py .
|
| 17 |
+
|
| 18 |
+
USER app
|
| 19 |
+
ENV HOME=/app \
|
| 20 |
+
PORT=7860 \
|
| 21 |
+
PYTHONUNBUFFERED=1 \
|
| 22 |
+
GRADIO_ANALYTICS_ENABLED=False
|
| 23 |
+
|
| 24 |
+
EXPOSE 7860
|
| 25 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -1,11 +1,63 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji: 🐠
|
| 4 |
-
colorFrom: indigo
|
| 5 |
-
colorTo: yellow
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
-
short_description: Private web front-end for CanLex
|
| 9 |
---
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: CanLex Web
|
|
|
|
|
|
|
|
|
|
| 3 |
sdk: docker
|
| 4 |
+
app_port: 7860
|
| 5 |
pinned: false
|
|
|
|
| 6 |
---
|
| 7 |
|
| 8 |
+
# CanLex Web (Path B)
|
| 9 |
+
|
| 10 |
+
A private web front-end for **CanLex**, so people without an MCP-capable AI
|
| 11 |
+
client can still ask Canadian legal-research questions.
|
| 12 |
+
|
| 13 |
+
## How it works -- thin client
|
| 14 |
+
|
| 15 |
+
This app holds **no copy of the legal corpus**. For each question it:
|
| 16 |
+
|
| 17 |
+
1. calls the deployed **CanLex MCP server** (`canlex_search_legislation`) to
|
| 18 |
+
retrieve the cited source passages;
|
| 19 |
+
2. sends those passages and the question to **Google Gemini Flash**, which
|
| 20 |
+
composes a grounded, cited answer following CanLex's own answering
|
| 21 |
+
instructions;
|
| 22 |
+
3. displays the answer, with the retrieved passages shown for review.
|
| 23 |
+
|
| 24 |
+
Because retrieval stays on the MCP server, a corpus or retrieval change is
|
| 25 |
+
deployed once (to the MCP Space) and both the MCP connector and this website
|
| 26 |
+
pick it up. Only UI or prompt changes redeploy this Space.
|
| 27 |
+
|
| 28 |
+
## Required Space secrets
|
| 29 |
+
|
| 30 |
+
Set these under **Settings -> Variables and secrets**:
|
| 31 |
+
|
| 32 |
+
| Name | Kind | Purpose |
|
| 33 |
+
|------|------|---------|
|
| 34 |
+
| `GEMINI_API_KEY` | secret | Free Gemini key from Google AI Studio (https://aistudio.google.com/apikey). |
|
| 35 |
+
| `CANLEX_WEB_AUTH` | secret | Login credentials, one `username:password` per line. |
|
| 36 |
+
|
| 37 |
+
Optional overrides:
|
| 38 |
+
|
| 39 |
+
| Name | Default |
|
| 40 |
+
|------|---------|
|
| 41 |
+
| `CANLEX_MCP_URL` | `https://beemer0-canlex.hf.space/mcp` |
|
| 42 |
+
| `CANLEX_GEMINI_MODEL` | `gemini-2.5-flash` |
|
| 43 |
+
|
| 44 |
+
If `CANLEX_WEB_AUTH` is unset the app falls back to an insecure default login
|
| 45 |
+
(`canlex` / `canlex`) and logs a warning -- set the secret before real use.
|
| 46 |
+
|
| 47 |
+
## Make the Space private
|
| 48 |
+
|
| 49 |
+
Under **Settings -> Change Space visibility**, set the Space to **Private**.
|
| 50 |
+
The app then has two layers of protection: Hugging Face gates who can open the
|
| 51 |
+
page at all, and Gradio's username/password gates who can use it.
|
| 52 |
+
|
| 53 |
+
## Run locally
|
| 54 |
+
|
| 55 |
+
```
|
| 56 |
+
pip install -r requirements.txt
|
| 57 |
+
# PowerShell:
|
| 58 |
+
$env:GEMINI_API_KEY = "your-key"
|
| 59 |
+
$env:CANLEX_WEB_AUTH = "me:secret"
|
| 60 |
+
python app.py
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
Then open http://localhost:7860.
|
app.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""CanLex Web (Path B) -- a private web front-end for CanLex legal research.
|
| 3 |
+
|
| 4 |
+
A thin client: it holds no copy of the legal corpus. For each question it
|
| 5 |
+
1. calls the CanLex MCP server (tool: canlex_search_legislation) to retrieve
|
| 6 |
+
the cited source passages;
|
| 7 |
+
2. sends those passages and the question to Google Gemini Flash, which
|
| 8 |
+
composes a grounded, cited answer following CanLex's answering instructions;
|
| 9 |
+
3. displays the answer, with the retrieved passages available for review.
|
| 10 |
+
|
| 11 |
+
All configuration comes from environment variables, set as Hugging Face Space
|
| 12 |
+
secrets. Run locally with: python app.py
|
| 13 |
+
"""
|
| 14 |
+
import asyncio
|
| 15 |
+
import json
|
| 16 |
+
import os
|
| 17 |
+
import sys
|
| 18 |
+
import urllib.error
|
| 19 |
+
import urllib.request
|
| 20 |
+
from datetime import timedelta
|
| 21 |
+
|
| 22 |
+
import gradio as gr
|
| 23 |
+
from mcp import ClientSession
|
| 24 |
+
from mcp.client.streamable_http import streamablehttp_client
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# --- Configuration (Hugging Face Space secrets / environment variables) -------
|
| 28 |
+
|
| 29 |
+
# The deployed CanLex MCP server. Retrieval logic and the corpus live there; this
|
| 30 |
+
# app never carries its own copy. Override only to point at a different server.
|
| 31 |
+
MCP_URL = os.environ.get(
|
| 32 |
+
"CANLEX_MCP_URL", "https://beemer0-canlex.hf.space/mcp").strip()
|
| 33 |
+
|
| 34 |
+
# Google Gemini -- the free-tier key is supplied as the GEMINI_API_KEY secret.
|
| 35 |
+
GEMINI_MODEL = os.environ.get("CANLEX_GEMINI_MODEL", "gemini-2.5-flash").strip()
|
| 36 |
+
GEMINI_ENDPOINT = ("https://generativelanguage.googleapis.com/v1beta/models/"
|
| 37 |
+
f"{GEMINI_MODEL}:generateContent")
|
| 38 |
+
|
| 39 |
+
SEARCH_TOOL = "canlex_search_legislation"
|
| 40 |
+
DEFAULT_TOP_K = 6
|
| 41 |
+
MAX_OUTPUT_TOKENS = 8192 # generous -- covers Gemini 2.5 thinking plus the answer
|
| 42 |
+
REQUEST_TIMEOUT = 120 # seconds, applied to both the MCP and the Gemini calls
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _load_auth() -> list[tuple[str, str]]:
|
| 46 |
+
"""Parse CANLEX_WEB_AUTH (one 'username:password' per line) for Gradio auth."""
|
| 47 |
+
creds: list[tuple[str, str]] = []
|
| 48 |
+
for line in os.environ.get("CANLEX_WEB_AUTH", "").splitlines():
|
| 49 |
+
line = line.strip()
|
| 50 |
+
if not line or ":" not in line:
|
| 51 |
+
continue
|
| 52 |
+
user, password = (p.strip() for p in line.split(":", 1))
|
| 53 |
+
if user and password:
|
| 54 |
+
creds.append((user, password))
|
| 55 |
+
if not creds:
|
| 56 |
+
print("WARNING: CANLEX_WEB_AUTH is not set; using the insecure default "
|
| 57 |
+
"login 'canlex' / 'canlex'. Set CANLEX_WEB_AUTH as a Space secret "
|
| 58 |
+
"(one 'username:password' per line) before sharing this app.",
|
| 59 |
+
file=sys.stderr)
|
| 60 |
+
creds = [("canlex", "canlex")]
|
| 61 |
+
return creds
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
AUTH = _load_auth()
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# --- Prompting ----------------------------------------------------------------
|
| 68 |
+
|
| 69 |
+
# Light web framing only. The substantive answering guidance is CanLex's own
|
| 70 |
+
# "ANSWERING INSTRUCTIONS", returned inline in the retrieved material below; this
|
| 71 |
+
# system instruction just sets the web role and disables the tool-calling steps
|
| 72 |
+
# that guidance assumes (this front-end cannot call follow-up MCP tools).
|
| 73 |
+
SYSTEM_INSTRUCTION = """\
|
| 74 |
+
You are CanLex Web, a Canadian legal-research assistant. A member of the public \
|
| 75 |
+
has asked the legal question shown below through a web form. Compose one clear, \
|
| 76 |
+
well-organised answer, grounded entirely in the retrieved source material that \
|
| 77 |
+
follows the question.
|
| 78 |
+
|
| 79 |
+
The retrieved material opens with a block headed "ANSWERING INSTRUCTIONS" from \
|
| 80 |
+
the CanLex retrieval system. Follow those instructions, with these adjustments \
|
| 81 |
+
for this web setting:
|
| 82 |
+
|
| 83 |
+
- You have no tools and cannot retrieve anything further. Disregard any \
|
| 84 |
+
instruction to call canlex_get_section, canlex_search_legislation or \
|
| 85 |
+
canlex_case. Work only with the passages provided. If the question depends on a \
|
| 86 |
+
provision, regulation or decision that is referred to but not reproduced below, \
|
| 87 |
+
say so plainly and name what the reader should consult -- never guess its \
|
| 88 |
+
contents.
|
| 89 |
+
- Write for a reader who cannot see the raw passages: quote the key operative \
|
| 90 |
+
words and give every citation in full.
|
| 91 |
+
- Use plain Markdown -- short paragraphs, with headings or lists where they aid \
|
| 92 |
+
clarity.
|
| 93 |
+
- If the retrieved material does not actually answer the question, say so \
|
| 94 |
+
directly rather than stretching it to fit.
|
| 95 |
+
- Close with a one-line reminder that this is legal information, not legal \
|
| 96 |
+
advice, and is current only to the dates stated in the sources."""
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# --- Retrieval: call the CanLex MCP server ------------------------------------
|
| 100 |
+
|
| 101 |
+
async def _mcp_search(query: str, top_k: int) -> str:
|
| 102 |
+
"""Call canlex_search_legislation on the remote MCP server; return its text."""
|
| 103 |
+
async with streamablehttp_client(
|
| 104 |
+
MCP_URL,
|
| 105 |
+
timeout=timedelta(seconds=REQUEST_TIMEOUT),
|
| 106 |
+
sse_read_timeout=timedelta(seconds=REQUEST_TIMEOUT),
|
| 107 |
+
) as (read, write, _):
|
| 108 |
+
async with ClientSession(read, write) as session:
|
| 109 |
+
await session.initialize()
|
| 110 |
+
# The tool wraps its arguments in a single 'params' object (the
|
| 111 |
+
# server defines the tool as canlex_search_legislation(params: ...)).
|
| 112 |
+
result = await session.call_tool(
|
| 113 |
+
SEARCH_TOOL,
|
| 114 |
+
{"params": {"query": query, "top_k": int(top_k)}},
|
| 115 |
+
)
|
| 116 |
+
text = "\n".join(
|
| 117 |
+
block.text for block in result.content
|
| 118 |
+
if getattr(block, "type", None) == "text" and getattr(block, "text", None)
|
| 119 |
+
).strip()
|
| 120 |
+
if result.isError:
|
| 121 |
+
raise RuntimeError(text or "the retrieval service returned an error.")
|
| 122 |
+
return text
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def retrieve(query: str, top_k: int) -> str:
|
| 126 |
+
"""Synchronous wrapper around the async MCP retrieval call."""
|
| 127 |
+
return asyncio.run(_mcp_search(query, top_k))
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
# --- Answer composition: call Google Gemini -----------------------------------
|
| 131 |
+
|
| 132 |
+
def compose_answer(question: str, retrieved: str) -> str:
|
| 133 |
+
"""Send the question and retrieved passages to Gemini; return the answer."""
|
| 134 |
+
api_key = os.environ.get("GEMINI_API_KEY", "").strip()
|
| 135 |
+
if not api_key:
|
| 136 |
+
raise RuntimeError(
|
| 137 |
+
"GEMINI_API_KEY is not set. Add it as a Space secret -- create a "
|
| 138 |
+
"free key at Google AI Studio (https://aistudio.google.com/apikey).")
|
| 139 |
+
|
| 140 |
+
user_prompt = (
|
| 141 |
+
"Answer this question for the user, using only the retrieved CanLex "
|
| 142 |
+
"material that follows it.\n\n"
|
| 143 |
+
f"QUESTION:\n{question}\n\n"
|
| 144 |
+
"RETRIEVED CANLEX MATERIAL (it begins with CanLex's own answering "
|
| 145 |
+
f"instructions):\n\n{retrieved}"
|
| 146 |
+
)
|
| 147 |
+
body = {
|
| 148 |
+
"systemInstruction": {"parts": [{"text": SYSTEM_INSTRUCTION}]},
|
| 149 |
+
"contents": [{"role": "user", "parts": [{"text": user_prompt}]}],
|
| 150 |
+
"generationConfig": {
|
| 151 |
+
"temperature": 0.2,
|
| 152 |
+
"maxOutputTokens": MAX_OUTPUT_TOKENS,
|
| 153 |
+
},
|
| 154 |
+
# Legal research routinely discusses crime, weapons and the like; relax
|
| 155 |
+
# the filters so legitimate legal text is not spuriously blocked.
|
| 156 |
+
"safetySettings": [
|
| 157 |
+
{"category": c, "threshold": "BLOCK_ONLY_HIGH"}
|
| 158 |
+
for c in ("HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
|
| 159 |
+
"HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
| 160 |
+
"HARM_CATEGORY_DANGEROUS_CONTENT")
|
| 161 |
+
],
|
| 162 |
+
}
|
| 163 |
+
request = urllib.request.Request(
|
| 164 |
+
GEMINI_ENDPOINT,
|
| 165 |
+
data=json.dumps(body).encode("utf-8"),
|
| 166 |
+
headers={"Content-Type": "application/json", "x-goog-api-key": api_key},
|
| 167 |
+
method="POST",
|
| 168 |
+
)
|
| 169 |
+
try:
|
| 170 |
+
with urllib.request.urlopen(request, timeout=REQUEST_TIMEOUT) as resp:
|
| 171 |
+
payload = json.loads(resp.read().decode("utf-8"))
|
| 172 |
+
except urllib.error.HTTPError as exc:
|
| 173 |
+
detail = exc.read().decode("utf-8", "replace")[:600]
|
| 174 |
+
raise RuntimeError(f"Gemini API returned HTTP {exc.code}: {detail}") \
|
| 175 |
+
from None
|
| 176 |
+
except urllib.error.URLError as exc:
|
| 177 |
+
raise RuntimeError(f"Could not reach the Gemini API: {exc.reason}") \
|
| 178 |
+
from None
|
| 179 |
+
|
| 180 |
+
candidates = payload.get("candidates") or []
|
| 181 |
+
if not candidates:
|
| 182 |
+
raise RuntimeError(
|
| 183 |
+
"Gemini returned no answer. promptFeedback: "
|
| 184 |
+
f"{payload.get('promptFeedback', {})}")
|
| 185 |
+
candidate = candidates[0]
|
| 186 |
+
parts = candidate.get("content", {}).get("parts", []) or []
|
| 187 |
+
answer = "".join(
|
| 188 |
+
part["text"] for part in parts
|
| 189 |
+
if "text" in part and not part.get("thought")
|
| 190 |
+
).strip()
|
| 191 |
+
if not answer:
|
| 192 |
+
reason = candidate.get("finishReason", "unknown")
|
| 193 |
+
raise RuntimeError(
|
| 194 |
+
f"Gemini produced an empty answer (finishReason: {reason}). "
|
| 195 |
+
"If this is MAX_TOKENS, raise MAX_OUTPUT_TOKENS in app.py.")
|
| 196 |
+
return answer
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# --- Gradio handler -----------------------------------------------------------
|
| 200 |
+
|
| 201 |
+
NO_RESULTS_PREFIX = "No results matched"
|
| 202 |
+
ANSWER_PLACEHOLDER = "*Your answer will appear here.*"
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def answer(question: str, top_k: int):
|
| 206 |
+
"""Generator: retrieve passages, compose an answer, stream status updates."""
|
| 207 |
+
question = (question or "").strip()
|
| 208 |
+
if not question:
|
| 209 |
+
yield "Please enter a legal question above.", ""
|
| 210 |
+
return
|
| 211 |
+
|
| 212 |
+
yield "_Retrieving source passages from CanLex..._", ""
|
| 213 |
+
try:
|
| 214 |
+
retrieved = retrieve(question, top_k)
|
| 215 |
+
except Exception as exc:
|
| 216 |
+
yield ("**Could not reach the CanLex retrieval service.**\n\n"
|
| 217 |
+
f"`{type(exc).__name__}: {exc}`\n\n"
|
| 218 |
+
"The service may be waking from sleep -- wait a moment and try "
|
| 219 |
+
"again."), ""
|
| 220 |
+
return
|
| 221 |
+
|
| 222 |
+
if not retrieved:
|
| 223 |
+
yield "The retrieval service returned nothing. Please try again.", ""
|
| 224 |
+
return
|
| 225 |
+
if retrieved.startswith(NO_RESULTS_PREFIX):
|
| 226 |
+
yield f"**No matching CanLex material was found.**\n\n{retrieved}", ""
|
| 227 |
+
return
|
| 228 |
+
|
| 229 |
+
yield "_Composing a grounded answer with Gemini..._", retrieved
|
| 230 |
+
try:
|
| 231 |
+
composed = compose_answer(question, retrieved)
|
| 232 |
+
except Exception as exc:
|
| 233 |
+
yield (f"**The answer could not be composed.**\n\n"
|
| 234 |
+
f"`{type(exc).__name__}: {exc}`"), retrieved
|
| 235 |
+
return
|
| 236 |
+
|
| 237 |
+
yield composed, retrieved
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
# --- UI -----------------------------------------------------------------------
|
| 241 |
+
|
| 242 |
+
INTRO = """\
|
| 243 |
+
# CanLex -- Canadian Legal Research
|
| 244 |
+
|
| 245 |
+
Ask a question about Canadian **border, customs, immigration, criminal, drug,
|
| 246 |
+
labour or related federal law**. CanLex finds the governing statutory
|
| 247 |
+
provisions, CBSA guidance, collective-agreement terms and leading court
|
| 248 |
+
decisions, then composes an answer that cites them.
|
| 249 |
+
|
| 250 |
+
Legal information, not legal advice -- always verify against the primary sources.
|
| 251 |
+
"""
|
| 252 |
+
|
| 253 |
+
EXAMPLE_QUESTIONS = [
|
| 254 |
+
"What are the detention review timelines for a permanent resident?",
|
| 255 |
+
"When is a foreign national inadmissible for serious criminality?",
|
| 256 |
+
"What overtime provisions apply to FB-group Border Services officers?",
|
| 257 |
+
"Can the CBSA seize goods for an undervalued customs declaration?",
|
| 258 |
+
]
|
| 259 |
+
|
| 260 |
+
with gr.Blocks(title="CanLex", analytics_enabled=False) as demo:
|
| 261 |
+
gr.Markdown(INTRO)
|
| 262 |
+
|
| 263 |
+
question = gr.Textbox(
|
| 264 |
+
label="Your legal question",
|
| 265 |
+
placeholder="e.g. What are the detention review timelines for a "
|
| 266 |
+
"permanent resident?",
|
| 267 |
+
lines=3,
|
| 268 |
+
)
|
| 269 |
+
with gr.Accordion("Search options", open=False):
|
| 270 |
+
top_k = gr.Slider(
|
| 271 |
+
minimum=3, maximum=12, value=DEFAULT_TOP_K, step=1,
|
| 272 |
+
label="Number of source passages to retrieve",
|
| 273 |
+
)
|
| 274 |
+
with gr.Row():
|
| 275 |
+
submit = gr.Button("Ask CanLex", variant="primary")
|
| 276 |
+
clear = gr.Button("Clear")
|
| 277 |
+
|
| 278 |
+
gr.Examples(examples=EXAMPLE_QUESTIONS, inputs=question, label="Examples")
|
| 279 |
+
|
| 280 |
+
answer_md = gr.Markdown(value=ANSWER_PLACEHOLDER)
|
| 281 |
+
with gr.Accordion("Retrieved source passages", open=False):
|
| 282 |
+
sources_md = gr.Markdown()
|
| 283 |
+
|
| 284 |
+
submit.click(answer, [question, top_k], [answer_md, sources_md])
|
| 285 |
+
question.submit(answer, [question, top_k], [answer_md, sources_md])
|
| 286 |
+
clear.click(lambda: ("", ANSWER_PLACEHOLDER, ""), None,
|
| 287 |
+
[question, answer_md, sources_md])
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
if __name__ == "__main__":
|
| 291 |
+
print(f"CanLex Web starting -- MCP: {MCP_URL}; model: {GEMINI_MODEL}; "
|
| 292 |
+
f"{len(AUTH)} login(s) configured.", file=sys.stderr)
|
| 293 |
+
demo.queue()
|
| 294 |
+
demo.launch(
|
| 295 |
+
server_name="0.0.0.0",
|
| 296 |
+
server_port=int(os.environ.get("PORT", "7860")),
|
| 297 |
+
auth=AUTH,
|
| 298 |
+
auth_message="Sign in to use CanLex.",
|
| 299 |
+
show_api=False,
|
| 300 |
+
)
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CanLex Web (Path B) -- thin-client dependencies.
|
| 2 |
+
gradio>=5.0 # web UI with built-in username/password auth
|
| 3 |
+
mcp>=1.9 # streamable-HTTP client for the CanLex MCP server
|
| 4 |
+
# Google Gemini is called through its REST API using only the Python standard
|
| 5 |
+
# library (urllib); no LLM SDK dependency is needed.
|