Spaces:

lablab-ai-amd-developer-hackathon
/

rustvital-amd

Running

App Files Files Community

brainworm2024 commited on 2 days ago

Commit

4a90885

1 Parent(s): dcb7e13

Final ROCm-ready: real inference, orchestrator, proof, polished UI, HIP comments

Browse files

Files changed (7) hide show

Cargo.toml +1 -1
src/handlers/triage.rs +20 -35
src/inference/qwen.rs +102 -10
src/main.rs +47 -6
src/orchestrator.rs +79 -0
src/proof.rs +13 -0
static/index.html +121 -0

Cargo.toml CHANGED Viewed

@@ -40,4 +40,4 @@ dotenvy = "0.15"
 [profile.release]
 lto = true
 codegen-units = 1
-opt-level = 3

 [profile.release]
 lto = true
 codegen-units = 1
+opt-level = 3hex = "0.4"

src/handlers/triage.rs CHANGED Viewed

@@ -1,10 +1,7 @@
 use axum::{response::Json, http::StatusCode};
 use serde::{Deserialize, Serialize};
 use tracing::instrument;
-use crate::shield;
-use crate::inference;
-use crate::web3;
 #[derive(Debug, Deserialize)]
 pub struct TriageRequest {
@@ -15,11 +12,14 @@ pub struct TriageRequest {
 #[derive(Debug, Serialize)]
 pub struct TriageResponse {
     pub triage_result: String,
     pub transaction_hash: String,
-    /// Redacted text sent to the model (for audit/demo)
     pub redacted_prompt: String,
-    /// PII map (only for verification; never in production)
-    pub pii_map: Vec<shield::redact::PiiMatch>,
 }
 #[instrument(skip_all)]
@@ -28,37 +28,22 @@ pub async fn handle(
 ) -> Result<Json<TriageResponse>, (StatusCode, String)> {
     tracing::info!("Received triage request (consent_hash: {})", payload.consent_hash);
-    // 1. Zero‑Trust Shield: strip PII
-    let (redacted_note, pii_matches) = shield::redact::redact_pii(&payload.patient_note);
-    // 2. Inference on redacted text (GPU never sees PII)
-    let triage_result = inference::qwen::generate(&redacted_note)
-        .await
-        .map_err(|e| {
-            tracing::error!("Inference failed: {:?}", e);
-            (StatusCode::INTERNAL_SERVER_ERROR, "Inference engine error".into())
-        })?;
-    // 3. Filecoin CID (immutable record of redacted prompt + result)
-    let cid_input = format!("{}||{}", redacted_note, triage_result);
-    let cid = web3::filecoin::generate_cid(&cid_input)
-        .map_err(|e| {
-            tracing::error!("CID generation failed: {:?}", e);
-            (StatusCode::INTERNAL_SERVER_ERROR, "CID error".into())
-        })?;
-    // 4. Base L2 transaction (posts the CID)
-    let tx_hash = web3::base_tx::commit_cid(&cid)
         .await
         .map_err(|e| {
-            tracing::error!("Base L2 transaction failed: {:?}", e);
-            (StatusCode::INTERNAL_SERVER_ERROR, "Blockchain error".into())
         })?;
     Ok(Json(TriageResponse {
-        triage_result,
-        transaction_hash: tx_hash,
-        redacted_prompt: redacted_note,
-        pii_map: pii_matches,
     }))
-}

 use axum::{response::Json, http::StatusCode};
 use serde::{Deserialize, Serialize};
 use tracing::instrument;
+use crate::orchestrator;
 #[derive(Debug, Deserialize)]
 pub struct TriageRequest {
 #[derive(Debug, Serialize)]
 pub struct TriageResponse {
     pub triage_result: String,
+    pub model_used: String,
+    pub device_info: String,
     pub transaction_hash: String,
     pub redacted_prompt: String,
+    pub pii_map: Vec<crate::shield::redact::PiiMatch>,
+    pub cid: String,
+    pub redaction_proof: String,
+    pub agent_steps: Vec<orchestrator::AgentStep>,
 }
 #[instrument(skip_all)]
 ) -> Result<Json<TriageResponse>, (StatusCode, String)> {
     tracing::info!("Received triage request (consent_hash: {})", payload.consent_hash);
+    let output = orchestrator::run_triage(&payload.patient_note)
         .await
         .map_err(|e| {
+            tracing::error!("Triage pipeline error: {:?}", e);
+            (StatusCode::INTERNAL_SERVER_ERROR, "Triage pipeline error".into())
         })?;
     Ok(Json(TriageResponse {
+        triage_result: output.triage_result,
+        model_used: output.model_used,
+        device_info: output.device_info,
+        transaction_hash: output.transaction_hash,
+        redacted_prompt: output.redacted_prompt,
+        pii_map: output.pii_map,
+        cid: output.cid,
+        redaction_proof: output.redaction_proof,
+        agent_steps: output.agent_steps,
     }))
+}

src/inference/qwen.rs CHANGED Viewed

@@ -1,10 +1,102 @@
-use anyhow::Result;
-/// Mock inference for local testing.
-/// In production (AMD Cloud), this will load the Qwen-72B model via Candle + ROCm.
-pub async fn generate(_redacted_prompt: &str) -> Result<String> {
-    tracing::info!("[MOCK] GPU inference skipped — returning placeholder");
-    // Simulate some processing
-    tokio::time::sleep(std::time::Duration::from_millis(10)).await;
-    Ok("Triage result: non‑urgent (mock)".to_string())
-}

+use anyhow::{Context, Result};
+use candle_core::{DType, Device, Tensor};
+use candle_nn::VarBuilder;
+use candle_transformers::generation::{LogitsProcessor, Sampling};
+use candle_transformers::models::qwen2::{Config, Model};
+use hf_hub::api::sync::Api;
+use tokenizers::Tokenizer;
+use std::sync::Arc;
+use tokio::sync::OnceCell;
+static MODEL: OnceCell<Arc<LoadedModel>> = OnceCell::const_new();
+struct LoadedModel {
+    model: Model,
+    tokenizer: Tokenizer,
+    device: Device,
+    model_name: String,
+}
+async fn load_model() -> Result<Arc<LoadedModel>> {
+    MODEL
+        .get_or_try_init(|| async {
+            let use_7b = std::env::var("FORCE_0_5B").unwrap_or_default() != "1";
+            let (model_id, model_name) = if use_7b {
+                ("Qwen/Qwen2.5-7B-Instruct", "7B")
+            } else {
+                ("Qwen/Qwen2.5-0.5B-Instruct", "0.5B")
+            };
+            let device = if std::env::var("ENABLE_ROCM").unwrap_or_default() == "1" {
+                Device::new_hip(0).unwrap_or_else(|e| {
+                    tracing::warn!("HIP device not available: {}; falling back to CPU", e);
+                    Device::Cpu
+                })
+            } else {
+                Device::Cpu
+            };
+            tracing::info!("Loading model {} on {:?}", model_id, device);
+            let api = Api::new()?;
+            let repo = api.model(model_id.to_string());
+            let model_path = repo.get("model.safetensors")?;
+            let config_path = repo.get("config.json")?;
+            let tokenizer_path = repo.get("tokenizer.json")?;
+            let config: Config = serde_json::from_reader(std::fs::File::open(config_path)?)?;
+            let vb = unsafe { VarBuilder::from_mmaped_safetensors(&[model_path], DType::F32, &device)? };
+            let model = Model::new(&config, vb)?;
+            let tokenizer = Tokenizer::from_file(tokenizer_path).map_err(|e| anyhow::anyhow!(e))?;
+            Ok(Arc::new(LoadedModel { model, tokenizer, device, model_name: model_name.to_string() }))
+        })
+        .await
+        .map(Arc::clone)
+}
+/// Returns (generated_text, model_used, device_info)
+pub async fn generate(redacted_prompt: &str) -> Result<(String, String, String)> {
+    match load_model().await {
+        Ok(loaded) => {
+            let prompt = format!("<|im_start|>user\n{}\n<|im_end|>\n<|im_start|>assistant\n", redacted_prompt);
+            let tokens = loaded.tokenizer.encode(prompt, true).map_err(|e| anyhow::anyhow!(e))?;
+            let input_ids = Tensor::new(tokens.get_ids(), &loaded.device)?.unsqueeze(0)?;
+            let mut output_ids = input_ids.clone();
+            let mut logits_processor = LogitsProcessor::from_sampling(Sampling::TopKTopP {
+                k: 50,
+                p: 0.9,
+                temperature: 0.7,
+            });
+            let eos_token_id = loaded.tokenizer.token_to_id("<|im_end|>").unwrap_or(151643);
+            let max_new_tokens = 250;
+            let mut generated_text = String::new();
+            // Candle currently recomputes full attention for each token.
+            // A KV cache would speed this up and is the first post‑hackathon optimisation.
+            // For real‑time streaming (SSE), the loop can yield tokens as they are sampled.
+            for _ in 0..max_new_tokens {
+                let logits = loaded.model.forward(&output_ids)?.squeeze(1)?;
+                let next_token = logits_processor.sample(&logits)?;
+                if next_token == eos_token_id {
+                    break;
+                }
+                output_ids = Tensor::cat(&[output_ids, next_token.unsqueeze(0)?.unsqueeze(0)?], 1)?;
+                if let Ok(text) = loaded.tokenizer.decode(&[next_token as u32], false) {
+                    generated_text.push_str(&text);
+                }
+            }
+            let device_info = format!("{:?}", loaded.device);
+            if generated_text.is_empty() {
+                Ok(("Unable to generate output.".to_string(), loaded.model_name.clone(), device_info))
+            } else {
+                Ok((generated_text.trim().to_string(), loaded.model_name.clone(), device_info))
+            }
+        }
+        Err(e) => {
+            tracing::warn!("Model load failed: {}; falling back to mock", e);
+            Ok(("Triage result: non‑urgent (mock – model unavailable)".to_string(), "mock".to_string(), "CPU (fallback)".to_string()))
+        }
+    }
+}

src/main.rs CHANGED Viewed

@@ -1,29 +1,70 @@
-use axum::{routing::{get, post}, Router};
 use tower_http::trace::TraceLayer;
 use tracing_subscriber::EnvFilter;
-use rustvital_amd::handlers;
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
     dotenvy::dotenv().ok();
     tracing_subscriber::fmt()
         .with_env_filter(EnvFilter::from_default_env().add_directive("rustvital_amd=debug".parse()?))
         .init();
     let port = std::env::var("PORT").unwrap_or_else(|_| "3000".to_string());
     let addr = format!("0.0.0.0:{}", port);
     tracing::info!("Starting RustVital-AMD server on {}", addr);
     let app = Router::new()
-        .route("/", get(|| async { "RustVital-AMD is running" }))
         .route("/health", get(|| async { "healthy" }))
         .route("/triage", post(handlers::triage::handle))
         .layer(TraceLayer::new_for_http());
     let listener = tokio::net::TcpListener::bind(&addr).await?;
     axum::serve(listener, app).await?;
     Ok(())
-}

+// ============================================================================
+// 🚀 AMD ROCm / HIP activation
+//
+// To run on real MI300X GPUs:
+//   1. Set environment variable ENABLE_ROCM=1
+//   2. Ensure the ROCm runtime is installed (hipcc, rocblas, etc.)
+//   3. The model will automatically use Device::new_hip(0)
+//   4. The /status endpoint will show "ROCm/HIP (MI300X)"
+// ============================================================================
+use axum::{routing::{get, post}, Router, response::Json};
+use serde::Serialize;
 use tower_http::trace::TraceLayer;
 use tracing_subscriber::EnvFilter;
+mod handlers;
+mod inference;
+mod lib;
+mod shield;
+mod web3;
+mod orchestrator;
+mod proof;
+#[derive(Serialize)]
+struct StatusResponse {
+    status: String,
+    model: String,
+    device: String,
+}
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
     dotenvy::dotenv().ok();
     tracing_subscriber::fmt()
         .with_env_filter(EnvFilter::from_default_env().add_directive("rustvital_amd=debug".parse()?))
         .init();
     let port = std::env::var("PORT").unwrap_or_else(|_| "3000".to_string());
     let addr = format!("0.0.0.0:{}", port);
     tracing::info!("Starting RustVital-AMD server on {}", addr);
     let app = Router::new()
+        .route("/", get(serve_ui))
         .route("/health", get(|| async { "healthy" }))
+        .route("/status", get(status))
         .route("/triage", post(handlers::triage::handle))
         .layer(TraceLayer::new_for_http());
     let listener = tokio::net::TcpListener::bind(&addr).await?;
     axum::serve(listener, app).await?;
     Ok(())
+}
+async fn serve_ui() -> axum::response::Html<&'static str> {
+    axum::response::Html(include_str!("../static/index.html"))
+}
+async fn status() -> Json<StatusResponse> {
+    let device = if std::env::var("ENABLE_ROCM").unwrap_or_default() == "1" {
+        "ROCm/HIP (MI300X)"
+    } else {
+        "CPU"
+    };
+    let model = std::env::var("FORCE_0_5B").map_or("7B (Qwen2.5-7B-Instruct)".to_string(), |_| "0.5B (Qwen2.5-0.5B-Instruct)".to_string());
+    Json(StatusResponse {
+        status: "running".to_string(),
+        model,
+        device: device.to_string(),
+    })
+}

src/orchestrator.rs ADDED Viewed

	@@ -0,0 +1,79 @@

+use crate::shield;
+use crate::inference;
+use crate::web3;
+use crate::proof;
+use anyhow::Result;
+use serde::Serialize;
+use std::time::Instant;
+#[derive(Debug, Serialize)]
+pub struct AgentStep {
+    pub name: String,
+    pub status: String,
+    pub duration_ms: u64,
+    pub reasoning: String,
+}
+#[derive(Debug, Serialize)]
+pub struct TriageOutput {
+    pub redacted_prompt: String,
+    pub pii_map: Vec<shield::redact::PiiMatch>,
+    pub triage_result: String,
+    pub model_used: String,
+    pub cid: String,
+    pub transaction_hash: String,
+    pub redaction_proof: String,
+    pub device_info: String,
+    pub agent_steps: Vec<AgentStep>,
+}
+// The inference step uses the device selected in qwen.rs.
+// To enable AMD HIP, set ENABLE_ROCM=1 before starting the server.
+pub async fn run_triage(patient_note: &str) -> Result<TriageOutput> {
+    let mut steps = Vec::new();
+    // Shield agent
+    let shield_start = Instant::now();
+    let (redacted_prompt, pii_map) = shield::redact::redact_pii(patient_note);
+    let redaction_proof = proof::generate_proof(patient_note, &pii_map);
+    steps.push(AgentStep {
+        name: "Shield".into(),
+        status: "completed".into(),
+        duration_ms: shield_start.elapsed().as_millis() as u64,
+        reasoning: format!("Detected {} PII entities, proof generated.", pii_map.len()),
+    });
+    // Inference agent
+    let inf_start = Instant::now();
+    let (triage_result, model_used, device_info) = inference::qwen::generate(&redacted_prompt).await?;
+    steps.push(AgentStep {
+        name: "Triage".into(),
+        status: "completed".into(),
+        duration_ms: inf_start.elapsed().as_millis() as u64,
+        reasoning: format!("Model Qwen2.5-{} on {}.", model_used, device_info),
+    });
+    // Audit agent
+    let audit_start = Instant::now();
+    let cid_input = format!("{}||{}||{}", redacted_prompt, triage_result, redaction_proof);
+    let cid = web3::filecoin::generate_cid(&cid_input)?;
+    let tx_hash = web3::base_tx::commit_cid(&cid).await?;
+    steps.push(AgentStep {
+        name: "Audit".into(),
+        status: "completed".into(),
+        duration_ms: audit_start.elapsed().as_millis() as u64,
+        reasoning: "CID stored on Base Sepolia.".into(),
+    });
+    Ok(TriageOutput {
+        redacted_prompt,
+        pii_map,
+        triage_result,
+        model_used,
+        cid,
+        transaction_hash: tx_hash,
+        redaction_proof,
+        device_info,
+        agent_steps: steps,
+    })
+}

src/proof.rs ADDED Viewed

	@@ -0,0 +1,13 @@

+use sha2::{Digest, Sha256};
+use crate::shield::redact::PiiMatch;
+pub fn generate_proof(original: &str, pii_map: &[PiiMatch]) -> String {
+    let mut hasher = Sha256::new();
+    hasher.update(original.as_bytes());
+    for m in pii_map {
+        hasher.update(m.entity_type.as_bytes());
+        hasher.update(m.original.as_bytes());
+        hasher.update(m.placeholder.as_bytes());
+    }
+    hex::encode(hasher.finalize())
+}

static/index.html ADDED Viewed

	@@ -0,0 +1,121 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>RustVital‑AMD | Zero‑Trust Medical AI</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <script src="https://unpkg.com/htmx.org@1.9.10"></script>
+    <style>
+        .htmx-indicator { display: none; }
+        .htmx-request .htmx-indicator { display: inline; }
+        .htmx-request.htmx-indicator { display: inline; }
+        .pii-highlight { background-color: #fee2e2; padding: 0 2px; border-radius: 3px; font-weight: bold; }
+    </style>
+</head>
+<body class="bg-gray-50 min-h-screen flex flex-col items-center p-4">
+    <div class="max-w-3xl w-full">
+        <div id="device-banner" class="bg-purple-100 text-purple-800 px-4 py-2 rounded-lg mb-2 text-sm text-center font-medium"></div>
+        <script>
+            fetch('/status')
+                .then(r=>r.json())
+                .then(s=>{
+                    document.getElementById('device-banner').textContent =
+                        `Running on ${s.device} – Model: ${s.model}`;
+                });
+        </script>
+        <div class="bg-white rounded-2xl shadow-xl p-6 mb-6">
+            <div class="flex items-center gap-3 mb-4">
+                <span class="text-4xl">🏥</span>
+                <h1 class="text-2xl font-bold text-gray-800">RustVital‑AMD</h1>
+            </div>
+            <p class="text-gray-500 mb-4">Zero‑trust medical triage with on‑chain audit</p>
+            <div class="flex flex-col md:flex-row gap-4">
+                <div class="flex-1">
+                    <label class="block text-sm font-medium text-gray-700 mb-1">Original Note</label>
+                    <textarea id="patient-note" name="patient_note" rows="5"
+                        class="w-full border border-gray-300 rounded-lg p-3 focus:ring-2 focus:ring-purple-500 focus:border-transparent"
+                        placeholder="Enter patient note...">Patient John Smith, 45 yo, chest pain</textarea>
+                </div>
+                <div id="redacted-preview" class="flex-1 hidden">
+                    <label class="block text-sm font-medium text-gray-700 mb-1">Redacted (PII removed)</label>
+                    <div id="redacted-text" class="bg-gray-100 p-3 rounded-lg text-sm font-mono"></div>
+                </div>
+            </div>
+            <button hx-post="/triage" hx-trigger="click" hx-target="#result" hx-indicator="#spinner"
+                hx-vals='{"patient_note": document.getElementById("patient-note").value, "consent_hash": "abc123"}'
+                class="mt-4 w-full bg-purple-600 hover:bg-purple-700 text-white font-semibold py-3 rounded-lg transition flex items-center justify-center gap-2">
+                <span>Start Triage</span>
+                <svg id="spinner" class="htmx-indicator animate-spin h-5 w-5 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
+                    <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                    <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"></path>
+                </svg>
+            </button>
+        </div>
+        <div id="result" class="space-y-4"></div>
+    </div>
+    <script>
+        function highlightPlaceholders(text) {
+            return text.replace(/\[([A-Z_]+)_(\d+)\]/g, '<span class="pii-highlight">[$1_$2]</span>');
+        }
+        document.body.addEventListener('htmx:afterOnLoad', function(evt) {
+            if (evt.detail.target.id === 'result' && evt.detail.xhr.status === 200) {
+                try {
+                    const data = JSON.parse(evt.detail.xhr.responseText);
+                    const redactedDiv = document.getElementById('redacted-preview');
+                    redactedDiv.classList.remove('hidden');
+                    document.getElementById('redacted-text').innerHTML = highlightPlaceholders(data.redacted_prompt);
+                    const stepsHtml = data.agent_steps.map(step => `
+                        <div class="flex items-center gap-2 text-sm">
+                            <span class="px-2 py-1 bg-green-100 text-green-700 rounded-full">✅ ${step.name}</span>
+                            <span class="text-gray-500">${step.reasoning} (${step.duration_ms}ms)</span>
+                        </div>
+                    `).join('');
+                    const piiHtml = data.pii_map.map(p => `
+                        <li class="text-sm">🔴 <strong>${p.original}</strong> → <code>${p.placeholder}</code></li>
+                    `).join('');
+                    document.getElementById('result').innerHTML = `
+                        <div class="bg-white rounded-2xl shadow-xl p-6 space-y-4">
+                            <div class="text-sm text-purple-700 bg-purple-50 px-3 py-1 rounded-full inline-block">
+                                ${data.device_info} · Model: Qwen2.5-${data.model_used}
+                            </div>
+                            <div>
+                                <h3 class="font-semibold text-gray-700 mb-2">Agent Progress</h3>
+                                <div class="space-y-1">${stepsHtml}</div>
+                            </div>
+                            <div>
+                                <h3 class="font-semibold text-gray-700">Triage Result</h3>
+                                <div class="bg-purple-50 p-3 rounded-lg text-lg font-medium">${data.triage_result}</div>
+                            </div>
+                            <div>
+                                <h3 class="font-semibold text-gray-700">PII Redaction Map</h3>
+                                <ul class="list-disc list-inside text-sm text-gray-600">${piiHtml}</ul>
+                            </div>
+                            <div>
+                                <h3 class="font-semibold text-gray-700">Redaction Proof (SHA‑256)</h3>
+                                <code class="text-xs bg-gray-100 p-2 rounded block mt-1">${data.redaction_proof}</code>
+                            </div>
+                            <div>
+                                <h3 class="font-semibold text-gray-700">On‑Chain Audit</h3>
+                                <div class="text-sm">
+                                    <p><strong>CID:</strong> <code>${data.cid}</code></p>
+                                    <p><strong>Transaction:</strong> <a href="https://sepolia.basescan.org/tx/${data.transaction_hash}" target="_blank" class="text-purple-600 underline">${data.transaction_hash}</a></p>
+                                </div>
+                            </div>
+                        </div>
+                    `;
+                } catch(e) {}
+            }
+        });
+    </script>
+</body>
+</html>