""" Achilles Code Scanner — HuggingFace Space AI-powered SAST: paste code, find vulnerabilities. Deploy: 1. Create Space on huggingface.co (Gradio SDK, T4 Small GPU) 2. Upload this directory 3. Set secrets: HF_MODEL (your fine-tuned SAST model or base model) """ import os import spaces import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer # ── Model ─────────────────────────────────────────────────────── MODEL_ID = os.environ.get("HF_MODEL", "Qwen/Qwen2.5-Coder-1.5B-Instruct") ADAPTER_ID = os.environ.get("HF_ADAPTER", "") SYSTEM_PROMPT = ( "You are Achilles, an elite AI Security Engineer. " "You ONLY report genuine vulnerabilities — you never raise false positives. " "You ALWAYS provide a response — never return empty output." ) print(f"Loading {MODEL_ID}...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True, ) if ADAPTER_ID: from peft import PeftModel model = PeftModel.from_pretrained(model, ADAPTER_ID) model.eval() print("Model ready!") # ── Inference (GPU allocated only during this call) ───────────── @spaces.GPU(duration=120) def scan_code(language: str, code: str, max_tokens: int = 1024) -> str: if not code.strip(): return "Paste some code to scan." user_msg = f"Analyze the following {language} code for security vulnerabilities:\n\n```{language}\n{code}\n```" prompt = ( f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n" f"<|im_start|>user\n{user_msg}<|im_end|>\n" f"<|im_start|>assistant\n" ) inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096).to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=0.3, top_p=0.9, do_sample=True, repetition_penalty=1.1, pad_token_id=tokenizer.pad_token_id, ) response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) if "<|im_end|>" in response: response = response[:response.index("<|im_end|>")] return response.strip() # ── Examples ──────────────────────────────────────────────────── EXAMPLES = [ ["python", '''import sqlite3, sys def get_user(username): conn = sqlite3.connect("app.db") query = f"SELECT * FROM users WHERE username = '{username}'" return conn.execute(query).fetchone() print(get_user(sys.argv[1]))'''], ["javascript", '''const express = require('express'); const { exec } = require('child_process'); const app = express(); app.get('/ping', (req, res) => { exec(`ping -c 3 ${req.query.host}`, (err, stdout) => { res.send(`
${stdout}`);
});
});
app.listen(3000);'''],
["php", ''''''],
["c", '''#include AI-Powered Code Vulnerability Scanner
Built by HTS-ASPM
Achilles Code Scanner — Results are AI-generated. Always verify findings with manual review.
""") if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)