nraptisss
/

telecom-intent-pipeline

Model card Files Files and versions

nraptisss commited on 12 days ago

Commit

015111a

·

verified ·

1 Parent(s): 512e317

Upload inference.py

Files changed (1) hide show

inference.py +9 -5

inference.py CHANGED Viewed

@@ -11,6 +11,7 @@ Or run with a file of intents:
 import argparse
 import json
 import re
 import sys
@@ -31,10 +32,16 @@ TOP_P = 0.95
 def load_model(adapter_path: str, base_model: str):
     """Load base model + LoRA adapters."""
     print(f"Loading base model: {base_model}")
     model = AutoModelForCausalLM.from_pretrained(
         base_model,
-        torch_dtype=torch.float16,
         device_map="auto",
         trust_remote_code=True,
     )
@@ -92,7 +99,6 @@ def generate_config(model, tokenizer, intent_text: str) -> str:
     generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
     # Extract only the assistant's response (after the prompt)
-    # For chat-templated output, we need to strip the input prompt
     response = generated[len(prompt):].strip()
     # Try to extract JSON if wrapped in markdown
@@ -106,16 +112,14 @@ def generate_config(model, tokenizer, intent_text: str) -> str:
 def validate_json(text: str) -> tuple[bool, dict | None]:
     """Try to parse response as JSON. Returns (success, parsed)."""
     try:
-        # Remove any trailing non-JSON text
         text = text.strip()
-        # Find first { and last }
         start = text.find("{")
         end = text.rfind("}")
         if start != -1 and end != -1 and end > start:
             text = text[start:end + 1]
         parsed = json.loads(text)
         return True, parsed
-    except json.JSONDecodeError as e:
         return False, None

 import argparse
 import json
+import os
 import re
 import sys
 def load_model(adapter_path: str, base_model: str):
     """Load base model + LoRA adapters."""
+    adapter_path = os.path.abspath(adapter_path)
+    if not os.path.isdir(adapter_path):
+        print(f"ERROR: Adapter path not found: {adapter_path}")
+        print("Run train.py first to generate adapters.")
+        sys.exit(1)
     print(f"Loading base model: {base_model}")
     model = AutoModelForCausalLM.from_pretrained(
         base_model,
+        dtype=torch.float16,
         device_map="auto",
         trust_remote_code=True,
     )
     generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
     # Extract only the assistant's response (after the prompt)
     response = generated[len(prompt):].strip()
     # Try to extract JSON if wrapped in markdown
 def validate_json(text: str) -> tuple[bool, dict | None]:
     """Try to parse response as JSON. Returns (success, parsed)."""
     try:
         text = text.strip()
         start = text.find("{")
         end = text.rfind("}")
         if start != -1 and end != -1 and end > start:
             text = text[start:end + 1]
         parsed = json.loads(text)
         return True, parsed
+    except json.JSONDecodeError:
         return False, None