Upload inference.py

015111a verified 12 days ago

6.41 kB

	"""
	Inference script for testing the fine-tuned telecom intent model.
	Loads LoRA adapters and generates network configurations from natural language intents.

	Usage on Kaggle:
	python inference.py --intent "Deploy a low latency slice for autonomous drones in the harbor zone"

	Or run with a file of intents:
	python inference.py --input_file intents.txt --output_file configs.json
	"""

	import argparse
	import json
	import os
	import re
	import sys

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel

	# ============================================================================
	# CONFIGURATION
	# ============================================================================

	BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct" # must match train.py
	ADAPTER_PATH = "./qwen2.5-7b-telecom-intent-lora" # output from train.py
	MAX_NEW_TOKENS = 1024
	TEMPERATURE = 0.1 # low for deterministic config generation
	TOP_P = 0.95


	def load_model(adapter_path: str, base_model: str):
	"""Load base model + LoRA adapters."""
	adapter_path = os.path.abspath(adapter_path)
	if not os.path.isdir(adapter_path):
	print(f"ERROR: Adapter path not found: {adapter_path}")
	print("Run train.py first to generate adapters.")
	sys.exit(1)

	print(f"Loading base model: {base_model}")
	model = AutoModelForCausalLM.from_pretrained(
	base_model,
	dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True,
	)

	print(f"Loading LoRA adapters: {adapter_path}")
	model = PeftModel.from_pretrained(model, adapter_path)
	model.eval()

	tokenizer = AutoTokenizer.from_pretrained(
	base_model,
	trust_remote_code=True,
	padding_side="left",
	)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	print("Model ready!")
	return model, tokenizer


	def generate_config(model, tokenizer, intent_text: str) -> str:
	"""Generate a network configuration from a natural language intent."""
	messages = [
	{
	"role": "system",
	"content": (
	"You are a 5G/6G network orchestrator. "
	"Given a natural language network intent, output a valid, "
	"spec-compliant JSON network configuration. "
	"Do not include any explanation — only the JSON configuration."
	),
	},
	{"role": "user", "content": intent_text},
	]

	prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	)

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=MAX_NEW_TOKENS,
	temperature=TEMPERATURE,
	top_p=TOP_P,
	do_sample=True,
	pad_token_id=tokenizer.pad_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)

	generated = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract only the assistant's response (after the prompt)
	response = generated[len(prompt):].strip()

	# Try to extract JSON if wrapped in markdown
	json_match = re.search(r"```(?:json)?\s(.?)\s*```", response, re.DOTALL)
	if json_match:
	response = json_match.group(1)

	return response


	def validate_json(text: str) -> tuple[bool, dict \| None]:
	"""Try to parse response as JSON. Returns (success, parsed)."""
	try:
	text = text.strip()
	start = text.find("{")
	end = text.rfind("}")
	if start != -1 and end != -1 and end > start:
	text = text[start:end + 1]
	parsed = json.loads(text)
	return True, parsed
	except json.JSONDecodeError:
	return False, None


	def main():
	parser = argparse.ArgumentParser(description="Telecom Intent Inference")
	parser.add_argument(
	"--intent",
	type=str,
	default=None,
	help="Single natural language intent string",
	)
	parser.add_argument(
	"--input_file",
	type=str,
	default=None,
	help="File with one intent per line",
	)
	parser.add_argument(
	"--output_file",
	type=str,
	default="generated_configs.json",
	help="Output JSON file for batch results",
	)
	parser.add_argument(
	"--adapter_path",
	type=str,
	default=ADAPTER_PATH,
	help="Path to LoRA adapters",
	)
	parser.add_argument(
	"--base_model",
	type=str,
	default=BASE_MODEL,
	help="Base model name",
	)
	args = parser.parse_args()

	model, tokenizer = load_model(args.adapter_path, args.base_model)

	intents = []
	if args.intent:
	intents = [args.intent]
	elif args.input_file:
	with open(args.input_file, "r") as f:
	intents = [line.strip() for line in f if line.strip()]
	else:
	# Interactive mode
	print("\nInteractive mode. Type 'quit' to exit.")
	while True:
	user_input = input("\nIntent> ")
	if user_input.lower() in ("quit", "exit", "q"):
	break
	config = generate_config(model, tokenizer, user_input)
	is_valid, parsed = validate_json(config)
	print(f"\n{'=' * 60}")
	print(f"Generated Config (valid={is_valid}):")
	print(f"{'=' * 60}")
	if is_valid:
	print(json.dumps(parsed, indent=2))
	else:
	print(config)
	return

	# Batch processing
	results = []
	valid_count = 0
	for i, intent in enumerate(intents):
	print(f"\n[{i + 1}/{len(intents)}] Processing: {intent[:80]}...")
	config = generate_config(model, tokenizer, intent)
	is_valid, parsed = validate_json(config)
	if is_valid:
	valid_count += 1

	results.append({
	"intent": intent,
	"generated_config": parsed if is_valid else config,
	"json_valid": is_valid,
	})

	# Save results
	with open(args.output_file, "w") as f:
	json.dump(results, f, indent=2)

	print(f"\n{'=' * 60}")
	print(f"Batch complete: {valid_count}/{len(intents)} valid JSON configs")
	print(f"Results saved to: {args.output_file}")


	if __name__ == "__main__":
	main()