| --- |
| language: [id] |
| license: apache-2.0 |
| tags: [text-classification, dfk, indonesian, disinformasi, fitnah, ujaran-kebencian] |
| base_model: unsloth/Qwen3.5-9B |
| --- |
| |
| # DFK Ministral-3-8B |
|
|
| Fine-tuned dari `unsloth/Qwen3.5-9B` untuk klasifikasi konten berbahaya Bahasa Indonesia (4 kelas). |
|
|
| ## Kelas: Fakta | Disinformasi | Fitnah | Ujaran Kebencian |
|
|
| ## Cara Pakai |
| ```python |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import torch, re |
| |
| tokenizer = AutoTokenizer.from_pretrained("ggapar/Qwen3.5-9B-DFK") |
| model = AutoModelForCausalLM.from_pretrained( |
| "ggapar/Qwen3.5-9B-DFK", torch_dtype=torch.bfloat16, device_map="auto" |
| ) |
| |
| SYSTEM = ( |
| "Anda adalah sistem deteksi konten DFK. " |
| "Klasifikasikan teks ke dalam: Fakta, Disinformasi, Fitnah, atau Ujaran Kebencian." |
| ) |
| |
| def classify(text): |
| msgs = [ |
| {"role":"system","content":SYSTEM}, |
| {"role":"user","content":f"Klasifikasikan: |
| {text}"}, |
| ] |
| prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True) |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
| with torch.inference_mode(): |
| out = model.generate(**inputs, max_new_tokens=256, do_sample=False) |
| resp = tokenizer.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) |
| # Parse [LABEL] dan [REASONING] |
| lbl_m = re.search(r'label\s*:\s*\*{0,2}([\w\s]+?)\*{0,2}[\.,]', resp.lower()) |
| rsn_m = re.search(r'penjelasan\s*:\s*(.*)', resp, re.DOTALL|re.IGNORECASE) |
| return { |
| "[LABEL]" : lbl_m.group(1).strip().upper() if lbl_m else "UNKNOWN", |
| "[REASONING]": rsn_m.group(1).strip()[:300] if rsn_m else resp, |
| "raw" : resp, |
| } |
| |
| print(classify("Vaksin COVID menyebabkan autism menurut dokter X.")) |
| ``` |
|
|