| import gradio as gr |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| import json, re |
|
|
| model_id = "LiquidAI/LFM2-350M-Extract" |
| tokenizer = AutoTokenizer.from_pretrained(model_id) |
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") |
|
|
| system_prompt = """Return data as a JSON object with the following schema: |
| - orders: list of objects: |
| - product: Product name |
| - price: Price as number without $ sign |
| - quantity: Number of items as integer""" |
|
|
| def clean_result(parsed): |
| for order in parsed.get("orders", []): |
| if "price" in order: |
| |
| price = str(order["price"]).replace("$", "").replace(",", "").strip() |
| try: |
| order["price"] = float(price) |
| except ValueError: |
| pass |
| return parsed |
|
|
| def extract_all(user_input): |
| messages = [ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": user_input} |
| ] |
| inputs = tokenizer.apply_chat_template( |
| messages, return_tensors="pt", return_dict=True, |
| add_generation_prompt=True |
| ).to(model.device) |
| input_len = inputs["input_ids"].shape[1] |
|
|
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=256, |
| temperature=0, |
| do_sample=False, |
| repetition_penalty=1.3 |
| ) |
| response = tokenizer.decode( |
| outputs[0][input_len:], skip_special_tokens=True |
| ).strip() |
| response = re.sub(r'```json|```', '', response).strip() |
|
|
| try: |
| parsed = json.loads(response) |
| parsed = clean_result(parsed) |
| return json.dumps(parsed, indent=2, ensure_ascii=False) |
| except json.JSONDecodeError: |
| return response |
|
|
| demo = gr.Interface( |
| fn=extract_all, |
| inputs=gr.Textbox(label="Input Text", lines=8), |
| outputs=gr.Textbox(label="Extracted JSON", lines=12) |
| ) |
| demo.launch() |