Spaces:

griddava
/

pull-request-validator

Sleeping

App Files Files Community

Sgridda commited on Jul 11, 2025

Commit

39b69d9

1 Parent(s): 9d8ec9c

Re-enable TinyLlama model for actual inference

Browse files

Files changed (1) hide show

main.py +102 -27

main.py CHANGED Viewed

@@ -1,34 +1,66 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import re
 import json
 # ----------------------------
-# 1. FastAPI App Initialization
 # ----------------------------
 app = FastAPI(
-    title="AI Code Review Service (Test Mode)",
-    description="A test version of the API without a live AI model.",
     version="1.0.0",
 )
 # ----------------------------
-# 2. Mock AI Model Loading (Simulated)
 # ----------------------------
 @app.on_event("startup")
 async def startup_event():
     """
-    In this test version, we just print a message.
-    We are not loading any real model.
     """
-    print("Server starting up in test mode.")
-    print("Model loading is disabled.")
 # ----------------------------
-# 3. API Request/Response Models
 # ----------------------------
 class ReviewRequest(BaseModel):
@@ -43,35 +75,78 @@ class ReviewResponse(BaseModel):
     comments: list[ReviewComment]
 # ----------------------------
-# 4. The API Endpoint (with Mocked Response)
 # ----------------------------
-@app.post("/review", response_model=ReviewResponse)
-async def get_code_review(request: ReviewRequest):
     """
-    This endpoint now returns a hardcoded, successful response.
-    It does not call an AI model.
     """
-    print("Received request for /review. Returning mocked response.")
-    if not request.diff:
-        raise HTTPException(status_code=400, detail="Diff content cannot be empty.")
-    # Create a fake response to prove the endpoint is working.
-    mock_comments = [
         {
-            "file_path": "src/mock/test.py",
-            "line_number": 10,
-            "comment_text": "This is a test comment from the mock server. If you see this, the API is working!"
         }
     ]
-    return ReviewResponse(comments=[ReviewComment(**c) for c in mock_comments])
 # ----------------------------
-# 5. Health Check Endpoint
 # ----------------------------
 @app.get("/health")
 async def health_check():
-    """A simple endpoint to confirm the server is running."""
-    return {"status": "ok", "model_loaded": False} # Model is not loaded in test mode

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import torch
 import re
 import json
 # ----------------------------
+# 1. Configuration
+# ----------------------------
+MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# ----------------------------
+# 2. FastAPI App Initialization
 # ----------------------------
 app = FastAPI(
+    title="AI Code Review Service",
+    description="An API to get AI-powered code reviews for pull request diffs.",
     version="1.0.0",
 )
 # ----------------------------
+# 3. AI Model Loading
 # ----------------------------
+model = None
+tokenizer = None
+def load_model():
+    """Loads the model and tokenizer into memory."""
+    global model, tokenizer
+    if model is None:
+        print(f"Loading model: {MODEL_NAME} on device: {DEVICE}...")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.bfloat16,
+            bnb_4bit_use_double_quant=False,
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            trust_remote_code=True,
+            quantization_config=quantization_config,
+            device_map="auto",
+        )
+        print("Model loaded successfully.")
 @app.on_event("startup")
 async def startup_event():
     """
+    On server startup, we trigger the model loading.
     """
+    print("Server starting up...")
+    load_model()
 # ----------------------------
+# 4. API Request/Response Models
 # ----------------------------
 class ReviewRequest(BaseModel):
     comments: list[ReviewComment]
 # ----------------------------
+# 5. The AI Review Logic
 # ----------------------------
+def run_ai_inference(diff: str) -> str:
     """
+    Runs the AI model to get the review.
     """
+    if not model or not tokenizer:
+        raise RuntimeError("Model is not loaded.")
+    messages = [
+        {
+            "role": "system",
+            "content": """You are an expert code reviewer. Your task is to analyze a pull request diff and provide constructive feedback.\nAnalyze the provided diff and identify potential issues, suggest improvements, or point out good practices.\n\nIMPORTANT: Respond with a JSON array of comment objects. Each object must have three fields: 'file_path', 'line_number', and 'comment_text'.\nThe 'file_path' should be the full path of the file being changed.\nThe 'line_number' must be an integer corresponding to the line number in the *new* version of the file where the comment applies.\nThe 'comment_text' should be your concise and clear review comment.\n\nExample response format:\n[\n    {\n        "file_path": "src/utils/helpers.py",\n        "line_number": 42,\n        "comment_text": "This function could be simplified by using a list comprehension."\n    }\n]\n\nDo not add any introductory text or explanations outside of the JSON array.\n"""
+        },
         {
+            "role": "user",
+            "content": f"Here is the diff to review:\n\n```diff\n{diff}\n```"
         }
     ]
+    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+    outputs = model.generate(inputs, max_new_tokens=1024, do_sample=False, top_k=50, top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
+    response_text = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
+    return response_text.strip()
+def parse_ai_response(response_text: str) -> list[ReviewComment]:
+    """
+    Parses the raw text from the AI to extract the JSON array.
+    """
+    print(f"Raw AI Response:\n---\n{response_text}\n---")
+    json_match = re.search(r'\[.*\]', response_text, re.DOTALL)
+    if not json_match:
+        print("Warning: Could not find a JSON array in the AI response.")
+        return []
+    json_string = json_match.group(0)
+    try:
+        comments_data = json.loads(json_string)
+        validated_comments = [ReviewComment(**item) for item in comments_data]
+        return validated_comments
+    except (json.JSONDecodeError, TypeError, KeyError) as e:
+        print(f"Error parsing JSON from AI response: {e}")
+        print(f"Invalid JSON string: {json_string}")
+        return []
+# ----------------------------
+# 6. The API Endpoint
+# ----------------------------
+@app.post("/review", response_model=ReviewResponse)
+async def get_code_review(request: ReviewRequest):
+    if not request.diff:
+        raise HTTPException(status_code=400, detail="Diff content cannot be empty.")
+    try:
+        ai_response_text = run_ai_inference(request.diff)
+        parsed_comments = parse_ai_response(ai_response_text)
+        return ReviewResponse(comments=parsed_comments)
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        raise HTTPException(status_code=500, detail="An internal error occurred while processing the review.")
 # ----------------------------
+# 7. Health Check Endpoint
 # ----------------------------
 @app.get("/health")
 async def health_check():
+    return {"status": "ok", "model_loaded": model is not None}