Add MLX remote server support via MLX_SERVER_URL env var
Browse files
app.py
CHANGED
|
@@ -320,6 +320,45 @@ def _run_pipe(prompt: str) -> str:
|
|
| 320 |
return result[0]["generated_text"]
|
| 321 |
|
| 322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
# ---------------------------------------------------------------------------
|
| 324 |
# Sample contract content
|
| 325 |
# ---------------------------------------------------------------------------
|
|
@@ -389,7 +428,7 @@ def analyze_contract(contract_text: str, question: str) -> tuple[str, str, str,
|
|
| 389 |
return format_label_html("N/A"), "", "", "Please paste a contract above."
|
| 390 |
if not question.strip():
|
| 391 |
return format_label_html("N/A"), "", "", "Please enter a question."
|
| 392 |
-
if _pipe is None:
|
| 393 |
return (
|
| 394 |
format_label_html("N/A"),
|
| 395 |
"Model not loaded",
|
|
@@ -402,9 +441,12 @@ def analyze_contract(contract_text: str, question: str) -> tuple[str, str, str,
|
|
| 402 |
messages = _build_contract_messages(contract_text, question)
|
| 403 |
|
| 404 |
for attempt in range(2):
|
| 405 |
-
prompt = _apply_template(messages, strict=(attempt == 1))
|
| 406 |
try:
|
| 407 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
result = _parse_model_output(raw, question)
|
| 409 |
label_html = format_label_html(result.label.value)
|
| 410 |
answer = result.answer or "(none — clause is absent or not applicable)"
|
|
@@ -470,7 +512,7 @@ def analyse_bank_statement(paste_text: str, pdf_file, csv_file) -> tuple[str, st
|
|
| 470 |
statement_text, error = _get_statement_text(paste_text, pdf_file, csv_file)
|
| 471 |
if error:
|
| 472 |
return f"**Error:** {error}", ""
|
| 473 |
-
if _pipe is None:
|
| 474 |
return (
|
| 475 |
f"**Model not loaded.** Set `HF_MODEL_REPO` in Space secrets. Error: {model_load_error}",
|
| 476 |
statement_text,
|
|
@@ -480,9 +522,12 @@ def analyse_bank_statement(paste_text: str, pdf_file, csv_file) -> tuple[str, st
|
|
| 480 |
messages = _build_bank_messages(statement_text, "SUMMARISE")
|
| 481 |
|
| 482 |
for attempt in range(2):
|
| 483 |
-
prompt = _apply_template(messages, strict=(attempt == 1))
|
| 484 |
try:
|
| 485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
summary = _parse_summary(raw)
|
| 487 |
lines = ["## Statement Summary", ""]
|
| 488 |
lines.append(f"**Total Credits:** {summary.total_credits or 'N/A'}")
|
|
@@ -515,7 +560,7 @@ def bank_qa(statement_text: str, question: str) -> tuple[str, str, str, str]:
|
|
| 515 |
)
|
| 516 |
if not question.strip():
|
| 517 |
return format_label_html("N/A"), "", "", "Please enter a question."
|
| 518 |
-
if _pipe is None:
|
| 519 |
return (
|
| 520 |
format_label_html("N/A"), "Model not loaded", "",
|
| 521 |
f"Model failed to load: {model_load_error}.",
|
|
@@ -525,9 +570,12 @@ def bank_qa(statement_text: str, question: str) -> tuple[str, str, str, str]:
|
|
| 525 |
messages = _build_bank_messages(statement_text, question)
|
| 526 |
|
| 527 |
for attempt in range(2):
|
| 528 |
-
prompt = _apply_template(messages, strict=(attempt == 1))
|
| 529 |
try:
|
| 530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
result = _parse_model_output(raw, question)
|
| 532 |
label_html = format_label_html(result.label.value)
|
| 533 |
answer = result.answer or "(none — information not found in statement)"
|
|
|
|
| 320 |
return result[0]["generated_text"]
|
| 321 |
|
| 322 |
|
| 323 |
+
# ---------------------------------------------------------------------------
|
| 324 |
+
# MLX Remote Server (Mac Mini via ngrok) - takes priority when MLX_SERVER_URL is set
|
| 325 |
+
# ---------------------------------------------------------------------------
|
| 326 |
+
_MLX_SERVER_URL = os.environ.get("MLX_SERVER_URL", "").rstrip("/")
|
| 327 |
+
_mlx_available = False
|
| 328 |
+
|
| 329 |
+
if _MLX_SERVER_URL:
|
| 330 |
+
try:
|
| 331 |
+
import urllib.request as _ur
|
| 332 |
+
_ur.urlopen(_MLX_SERVER_URL + "/v1/models", timeout=5)
|
| 333 |
+
_mlx_available = True
|
| 334 |
+
print("MLX remote server ready: " + _MLX_SERVER_URL)
|
| 335 |
+
except Exception as _e:
|
| 336 |
+
print("MLX server unreachable (" + str(_e) + "), falling back to local model.")
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def _run_via_mlx(messages, strict=False):
|
| 340 |
+
import urllib.request as _ur, json as _j
|
| 341 |
+
msgs = list(messages)
|
| 342 |
+
if strict:
|
| 343 |
+
msgs[-1] = dict(msgs[-1])
|
| 344 |
+
msgs[-1]["content"] += STRICT_SUFFIX
|
| 345 |
+
payload = _j.dumps({
|
| 346 |
+
"model": "mlx-community/Qwen3.5-9B-MLX-4bit",
|
| 347 |
+
"messages": msgs,
|
| 348 |
+
"max_tokens": 512,
|
| 349 |
+
"temperature": 0.0,
|
| 350 |
+
}).encode()
|
| 351 |
+
req = _ur.Request(
|
| 352 |
+
_MLX_SERVER_URL + "/v1/chat/completions",
|
| 353 |
+
data=payload,
|
| 354 |
+
headers={"Content-Type": "application/json"},
|
| 355 |
+
)
|
| 356 |
+
with _ur.urlopen(req, timeout=120) as resp:
|
| 357 |
+
data = _j.loads(resp.read())
|
| 358 |
+
return data["choices"][0]["message"]["content"]
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
|
| 362 |
# ---------------------------------------------------------------------------
|
| 363 |
# Sample contract content
|
| 364 |
# ---------------------------------------------------------------------------
|
|
|
|
| 428 |
return format_label_html("N/A"), "", "", "Please paste a contract above."
|
| 429 |
if not question.strip():
|
| 430 |
return format_label_html("N/A"), "", "", "Please enter a question."
|
| 431 |
+
if _pipe is None and not _mlx_available:
|
| 432 |
return (
|
| 433 |
format_label_html("N/A"),
|
| 434 |
"Model not loaded",
|
|
|
|
| 441 |
messages = _build_contract_messages(contract_text, question)
|
| 442 |
|
| 443 |
for attempt in range(2):
|
|
|
|
| 444 |
try:
|
| 445 |
+
if _mlx_available:
|
| 446 |
+
raw = _run_via_mlx(messages, strict=(attempt == 1))
|
| 447 |
+
else:
|
| 448 |
+
prompt = _apply_template(messages, strict=(attempt == 1))
|
| 449 |
+
raw = _run_pipe(prompt)
|
| 450 |
result = _parse_model_output(raw, question)
|
| 451 |
label_html = format_label_html(result.label.value)
|
| 452 |
answer = result.answer or "(none — clause is absent or not applicable)"
|
|
|
|
| 512 |
statement_text, error = _get_statement_text(paste_text, pdf_file, csv_file)
|
| 513 |
if error:
|
| 514 |
return f"**Error:** {error}", ""
|
| 515 |
+
if _pipe is None and not _mlx_available:
|
| 516 |
return (
|
| 517 |
f"**Model not loaded.** Set `HF_MODEL_REPO` in Space secrets. Error: {model_load_error}",
|
| 518 |
statement_text,
|
|
|
|
| 522 |
messages = _build_bank_messages(statement_text, "SUMMARISE")
|
| 523 |
|
| 524 |
for attempt in range(2):
|
|
|
|
| 525 |
try:
|
| 526 |
+
if _mlx_available:
|
| 527 |
+
raw = _run_via_mlx(messages, strict=(attempt == 1))
|
| 528 |
+
else:
|
| 529 |
+
prompt = _apply_template(messages, strict=(attempt == 1))
|
| 530 |
+
raw = _run_pipe(prompt)
|
| 531 |
summary = _parse_summary(raw)
|
| 532 |
lines = ["## Statement Summary", ""]
|
| 533 |
lines.append(f"**Total Credits:** {summary.total_credits or 'N/A'}")
|
|
|
|
| 560 |
)
|
| 561 |
if not question.strip():
|
| 562 |
return format_label_html("N/A"), "", "", "Please enter a question."
|
| 563 |
+
if _pipe is None and not _mlx_available:
|
| 564 |
return (
|
| 565 |
format_label_html("N/A"), "Model not loaded", "",
|
| 566 |
f"Model failed to load: {model_load_error}.",
|
|
|
|
| 570 |
messages = _build_bank_messages(statement_text, question)
|
| 571 |
|
| 572 |
for attempt in range(2):
|
|
|
|
| 573 |
try:
|
| 574 |
+
if _mlx_available:
|
| 575 |
+
raw = _run_via_mlx(messages, strict=(attempt == 1))
|
| 576 |
+
else:
|
| 577 |
+
prompt = _apply_template(messages, strict=(attempt == 1))
|
| 578 |
+
raw = _run_pipe(prompt)
|
| 579 |
result = _parse_model_output(raw, question)
|
| 580 |
label_html = format_label_html(result.label.value)
|
| 581 |
answer = result.answer or "(none — information not found in statement)"
|