--- language: [id] license: apache-2.0 tags: [text-classification, dfk, indonesian, disinformasi, fitnah, ujaran-kebencian] base_model: unsloth/Qwen3.5-9B --- # DFK Ministral-3-8B Fine-tuned dari `unsloth/Qwen3.5-9B` untuk klasifikasi konten berbahaya Bahasa Indonesia (4 kelas). ## Kelas: Fakta | Disinformasi | Fitnah | Ujaran Kebencian ## Cara Pakai ```python from transformers import AutoModelForCausalLM, AutoTokenizer import torch, re tokenizer = AutoTokenizer.from_pretrained("ggapar/Qwen3.5-9B-DFK") model = AutoModelForCausalLM.from_pretrained( "ggapar/Qwen3.5-9B-DFK", torch_dtype=torch.bfloat16, device_map="auto" ) SYSTEM = ( "Anda adalah sistem deteksi konten DFK. " "Klasifikasikan teks ke dalam: Fakta, Disinformasi, Fitnah, atau Ujaran Kebencian." ) def classify(text): msgs = [ {"role":"system","content":SYSTEM}, {"role":"user","content":f"Klasifikasikan: {text}"}, ] prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.inference_mode(): out = model.generate(**inputs, max_new_tokens=256, do_sample=False) resp = tokenizer.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) # Parse [LABEL] dan [REASONING] lbl_m = re.search(r'label\s*:\s*\*{0,2}([\w\s]+?)\*{0,2}[\.,]', resp.lower()) rsn_m = re.search(r'penjelasan\s*:\s*(.*)', resp, re.DOTALL|re.IGNORECASE) return { "[LABEL]" : lbl_m.group(1).strip().upper() if lbl_m else "UNKNOWN", "[REASONING]": rsn_m.group(1).strip()[:300] if rsn_m else resp, "raw" : resp, } print(classify("Vaksin COVID menyebabkan autism menurut dokter X.")) ```