# ================================================================ # ASAD AI — GRADIO APP v4.0 (HF Space compatible) # • Auto‑trains every 24h in background # • Persistent storage /data/ # • No localhost launch errors # ================================================================ import os, json, random, time, datetime, threading, logging import numpy as np import torch import gradio as gr from train import run_training, bow, clean, AsadAIModel, BASE_DATA from sklearn.preprocessing import LabelEncoder logging.basicConfig(level=logging.INFO, format="%(asctime)s [APP] %(message)s", datefmt="%H:%M:%S") log = logging.getLogger(__name__) STORAGE_DIR = os.environ.get("STORAGE_DIR", "/data") MODEL_PATH = os.path.join(STORAGE_DIR, "asad_ai_best.pth") INFO_PATH = os.path.join(STORAGE_DIR, "model_info.json") DATA_PATH = os.path.join(STORAGE_DIR, "training_data.json") RETRAIN_EVERY = 24 * 60 * 60 _model = None _vocab = [] _le = None _all_data = BASE_DATA _last_trained = None _is_training = False _lock = threading.Lock() def load_from_disk(): global _model, _vocab, _le, _all_data, _last_trained try: if not os.path.exists(INFO_PATH) or not os.path.exists(MODEL_PATH): return False with open(INFO_PATH, 'r') as f: info = json.load(f) le = LabelEncoder() le.classes_ = np.array(info['tags']) m = AsadAIModel(info['input_size'], info['hidden_size'], info['output_size']) m.load_state_dict(torch.load(MODEL_PATH, map_location='cpu', weights_only=True)) m.eval() if os.path.exists(DATA_PATH): with open(DATA_PATH, 'r', encoding='utf-8') as f: _all_data = json.load(f) with _lock: _model = m _vocab = info['vocab'] _le = le if 'trained_at' in info: try: _last_trained = datetime.datetime.strptime(info['trained_at'], "%Y-%m-%d %H:%M:%S") except: _last_trained = datetime.datetime.now() log.info("✅ Loaded saved model") return True except Exception as e: log.warning(f"Load failed: {e}") return False def do_train(): global _model, _vocab, _le, _all_data, _last_trained, _is_training with _lock: _is_training = True try: result = run_training() if result: m, v, le, data = result with _lock: _model = m _vocab = v _le = le _all_data = data _last_trained = datetime.datetime.now() log.info("✅ New model active") else: load_from_disk() finally: with _lock: _is_training = False def scheduler_loop(): log.info("🕐 Scheduler started – training now, then every 24h") do_train() while True: time.sleep(RETRAIN_EVERY) do_train() def get_response(text, threshold=0.40): with _lock: m, v, le, data = _model, _vocab, _le, _all_data if m is None: return "⏳ Model abhi train ho raha hai – thodi der mein aao!", "loading", 0.0 b = bow(text, v) t = torch.FloatTensor(b).unsqueeze(0) with torch.no_grad(): out = m(t) probs = torch.softmax(out, dim=1) conf, cls = torch.max(probs, 1) conf_val = conf.item() tag = le.inverse_transform(cls.numpy())[0] if conf_val < threshold: tag = "unknown" for intent in data.get("intents", []): if intent["tag"] == tag and intent.get("responses"): return random.choice(intent["responses"]), tag, conf_val return "Maafi chahta hoon!", "unknown", 0.0 def chat_fn(message, history): if not message.strip(): return "" resp, _, _ = get_response(message) return resp def get_status(): with _lock: training = _is_training lt = _last_trained uptime = str(datetime.datetime.now() - _start_time).split('.')[0] if training: return f"### 🔄 Training in progress...\n⏳ Please wait.\n🕐 Uptime: `{uptime}`" if lt: nxt = lt + datetime.timedelta(seconds=RETRAIN_EVERY) rem = max(nxt - datetime.datetime.now(), datetime.timedelta(0)) h = int(rem.total_seconds() // 3600) m = int((rem.total_seconds() % 3600) // 60) return f"### ✅ Model ready\n📅 Last trained: `{lt.strftime('%Y-%m-%d %H:%M:%S')}`\n⏰ Next: `{h}h {m}m`\n🕐 Uptime: `{uptime}`" return f"### ⏳ Waiting for first training...\n🕐 Uptime: `{uptime}`" def get_info(): try: if not os.path.exists(INFO_PATH): return "No model yet." with open(INFO_PATH, 'r') as f: info = json.load(f) return "\n\n".join([ f"🎯 Best accuracy: `{info.get('best_acc','?')}%`", f"📚 Vocab size: `{len(info.get('vocab',[]))}`", f"🗂️ Intents: `{info.get('intents_n', len(info.get('tags',[])))}`", f"📝 Patterns: `{info.get('patterns_n','?')}`", f"⏱️ Training time: `{info.get('elapsed_s','?')}s`" ]) except Exception: return "Info not available." def get_logs(): log_file = os.path.join(STORAGE_DIR, "train_log.jsonl") try: if not os.path.exists(log_file): return "No logs yet." with open(log_file, 'r') as f: lines = f.readlines()[-8:] out = [] for line in lines: try: d = json.loads(line) out.append(f"[{d.get('ts','')}] {d.get('event','')} | loss={d.get('loss','?')} acc={d.get('acc','?')}%") except: out.append(line.strip()) return '\n'.join(out) except: return "Log read error." # ── Start background scheduler ── _start_time = datetime.datetime.now() load_from_disk() _thread = threading.Thread(target=scheduler_loop, daemon=True) _thread.start() # ── Gradio UI (HF Space safe) ── CSS = """ .gradio-container { max-width: 980px !important; margin: auto !important; } footer { display: none !important; } """ HEADER = """
Pakistan ka Bilingual AI Chatbot — Urdu & English
🇵🇰 Made in Pakistan 🔄 Auto-trains every 24h