Spaces:

griddava
/

pull-request-validator

Sleeping

App Files Files Community

Sgridda commited on Jul 11, 2025

Commit

a1f54c5

1 Parent(s): 487d58e

trying different model

Browse files

Files changed (2) hide show

main.py +114 -178
main_ai_version.py +0 -214

main.py CHANGED Viewed

@@ -1,73 +1,60 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import torch
-import logging
-import json
 import re
-import time
-# Ultra-lightweight version with minimal AI
 app = FastAPI(
     title="AI Code Review Service",
-    description="AI-powered code reviews with DistilGPT-2 and fallback to mock responses",
-    version="2.0.0",
 )
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Global variables for model
 model = None
 tokenizer = None
-model_loaded = False
-def load_simple_model():
-    """Try to load the smallest possible model."""
-    global model, tokenizer, model_loaded
-    if model_loaded:
-        return True
-    try:
-        logger.info("Attempting to load DistilGPT-2 model...")
-        from transformers import AutoTokenizer, AutoModelForCausalLM
-        model_name = "distilgpt2"
-        # Load tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        if tokenizer.pad_token is None:
-            tokenizer.pad_token = tokenizer.eos_token
-        # Load model with conservative settings
         model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-            device_map="auto" if torch.cuda.is_available() else "cpu",
-            low_cpu_mem_usage=True
         )
-        model_loaded = True
-        logger.info("✅ DistilGPT-2 model loaded successfully!")
-        return True
-    except ImportError as e:
-        logger.warning("❌ Transformers library not available: %s", str(e))
-        return False
-    except Exception as e:
-        logger.warning("❌ Failed to load AI model: %s. Using mock responses only.", str(e))
-        return False
-# Try to load model on startup (but don't block if it fails)
-try:
-    model_loaded = load_simple_model()
-except Exception as e:
-    logger.warning("Model loading failed during startup: %s", str(e))
-    model_loaded = False
-# API Models
-class DiffRequest(BaseModel):
     diff: str
 class ReviewComment(BaseModel):
@@ -78,132 +65,81 @@ class ReviewComment(BaseModel):
 class ReviewResponse(BaseModel):
     comments: list[ReviewComment]
-# Root endpoint for consistency
-@app.get("/")
-def read_root():
-    return {
-        "message": "AI Code Review Service is running!",
-        "version": "2.0.0",
-        "model_loaded": model_loaded,
-        "endpoints": ["/health", "/review", "/docs"]
-    }
-@app.get("/health")
-def health_check():
-    """Health check endpoint."""
-    return {
-        "status": "healthy",
-        "service": "AI Code Review Service (Lightweight)",
-        "model_loaded": model_loaded,
-        "model_name": "distilgpt2" if model_loaded else "mock",
-        "device": "cuda" if torch.cuda.is_available() else "cpu",
-        "version": "2.0.0"
-    }
-def smart_ai_review(diff: str):
-    """AI review with timeout and better error handling."""
-    if not model_loaded or not model or not tokenizer:
-        return None
-    try:
-        # Simple but effective prompt
-        prompt = f"Code review - suggest improvements:\n{diff[:300]}\nReview:"
-        # Encode with proper settings
-        inputs = tokenizer.encode(
-            prompt,
-            return_tensors="pt",
-            max_length=400,
-            truncation=True
         )
-        start_time = time.time()
-        # Generate with timeout protection
-        with torch.no_grad():
-            outputs = model.generate(
-                inputs,
-                max_new_tokens=60,  # Balanced length
-                do_sample=True,     # More variety
-                temperature=0.7,    # Moderate creativity
-                top_p=0.9,          # Focus on likely tokens
-                num_return_sequences=1,
-                pad_token_id=tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-                use_cache=True
-            )
-        generation_time = time.time() - start_time
-        logger.info("AI generation completed in %.2f seconds", generation_time)
-        # Decode response
-        response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
-        response = response.strip()
-        # Clean up the response
-        if len(response) > 200:
-            response = response[:200] + "..."
-        return response if response else None
-    except Exception as e:
-        logger.warning("AI generation failed: %s", str(e))
-        return None
 @app.post("/review", response_model=ReviewResponse)
-def review_diff(request: DiffRequest):
-    """Review endpoint with AI and fallback."""
-    # Validate input
-    if not request.diff or not request.diff.strip():
-        raise HTTPException(status_code=400, detail="Diff content cannot be empty")
-    logger.info("📝 Received diff for review (length: %d chars)", len(request.diff))
     start_time = time.time()
-    ai_suggestion = None
-    # Try AI first if available
-    if model_loaded:
-        logger.info("🤖 Attempting AI review...")
-        ai_suggestion = smart_ai_review(request.diff)
-    if ai_suggestion and len(ai_suggestion.strip()) > 10:
-        # Use AI suggestion
-        comments = [{
-            "file_path": "code_file.py",
-            "line_number": 1,
-            "comment_text": f"🤖 AI Review: {ai_suggestion}"
-        }]
-        logger.info("✅ Returning AI-generated review")
-    else:
-        # Intelligent fallback based on diff content
-        diff_lower = request.diff.lower()
-        if "test" in diff_lower:
-            comment = "Consider adding more comprehensive test cases and edge case validation."
-        elif "function" in diff_lower or "def " in request.diff:
-            comment = "This function looks good! Consider adding docstrings and error handling."
-        elif "import" in diff_lower:
-            comment = "New imports detected. Ensure all dependencies are documented in requirements."
-        elif "class" in diff_lower:
-            comment = "Nice class structure! Consider adding type hints and comprehensive docstrings."
-        else:
-            comment = "Code looks clean! Consider adding comments for complex logic and error handling."
-        comments = [{
-            "file_path": "code_file.py",
-            "line_number": 1,
-            "comment_text": f"📋 Quick Review: {comment}"
-        }]
-        logger.info("📋 Returning smart fallback review")
-    total_time = time.time() - start_time
-    logger.info("⚡ Total review time: %.2f seconds", total_time)
-    return ReviewResponse(comments=comments)
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import re
+import json
+# ----------------------------
+# 1. Configuration
+# ----------------------------
+MODEL_NAME = "Salesforce/codegen-350M-mono"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# ----------------------------
+# 2. FastAPI App Initialization
+# ----------------------------
 app = FastAPI(
     title="AI Code Review Service",
+    description="An API to get AI-powered code reviews for pull request diffs.",
+    version="1.0.0",
 )
+# ----------------------------
+# 3. AI Model Loading
+# ----------------------------
 model = None
 tokenizer = None
+def load_model():
+    """Loads the model and tokenizer into memory."""
+    global model, tokenizer
+    if model is None:
+        print(f"Loading model: {MODEL_NAME} on device: {DEVICE}...")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
         model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.float32,
+            device_map="cpu",
         )
+        print("Model loaded successfully.")
+@app.on_event("startup")
+async def startup_event():
+    """
+    On server startup, we trigger the model loading.
+    """
+    print("Server starting up...")
+    load_model()
+# ----------------------------
+# 4. API Request/Response Models
+# ----------------------------
+class ReviewRequest(BaseModel):
     diff: str
 class ReviewComment(BaseModel):
 class ReviewResponse(BaseModel):
     comments: list[ReviewComment]
+# ----------------------------
+# 5. The AI Review Logic
+# ----------------------------
+def run_ai_inference(diff: str) -> str:
+    """
+    Runs the AI model to get the review.
+    """
+    if not model or not tokenizer:
+        raise RuntimeError("Model is not loaded.")
+    # Prompt for codegen-350M-mono
+    prompt = f"""# Review this code and suggest improvements:\n{diff[:800]}\n# Review:"""
+    inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=1024, truncation=True)
+    with torch.no_grad():
+        outputs = model.generate(
+            inputs,
+            max_new_tokens=128,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.95,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
+            use_cache=True
         )
+    response_text = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
+    # Post-process: take only the first non-empty line as the review
+    review_lines = [line.strip() for line in response_text.strip().split('\n') if line.strip()]
+    review = review_lines[0] if review_lines else "AI review completed - no specific issues found."
+    return review
+def parse_ai_response(response_text: str) -> list[ReviewComment]:
+    """
+    Parses the raw text from the AI to extract the JSON array.
+    """
+    # For codegen-350M-mono, just wrap the review in a single comment
+    return [ReviewComment(
+        file_path="code_reviewed.py",
+        line_number=1,
+        comment_text=response_text.strip()
+    )]
+# ----------------------------
+# 6. The API Endpoint
+# ----------------------------
 @app.post("/review", response_model=ReviewResponse)
+async def get_code_review(request: ReviewRequest):
+    if not request.diff:
+        raise HTTPException(status_code=400, detail="Diff content cannot be empty.")
+    import time
     start_time = time.time()
+    print(f"Starting review request at {start_time}")
+    try:
+        print("Running AI inference...")
+        ai_response_text = run_ai_inference(request.diff)
+        print(f"AI inference completed in {time.time() - start_time:.2f} seconds")
+        print("Parsing AI response...")
+        parsed_comments = parse_ai_response(ai_response_text)
+        print(f"Total processing time: {time.time() - start_time:.2f} seconds")
+        return ReviewResponse(comments=parsed_comments)
+    except Exception as e:
+        print(f"An unexpected error occurred after {time.time() - start_time:.2f} seconds: {e}")
+        raise HTTPException(status_code=500, detail="An internal error occurred while processing the review.")
+# ----------------------------
+# 7. Health Check Endpoint
+# ----------------------------
+@app.get("/health")
+async def health_check():
+    return {"status": "ok", "model_loaded": model is not None}

main_ai_version.py DELETED Viewed

@@ -1,214 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-import torch
-import re
-import json
-# ----------------------------
-# 1. Configuration
-# ----------------------------
-MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# ----------------------------
-# 2. FastAPI App Initialization
-# ----------------------------
-app = FastAPI(
-    title="AI Code Review Service",
-    description="An API to get AI-powered code reviews for pull request diffs.",
-    version="1.0.0",
-)
-# ----------------------------
-# 3. AI Model Loading
-# ----------------------------
-model = None
-tokenizer = None
-def load_model():
-    """Loads the model and tokenizer into memory."""
-    global model, tokenizer
-    if model is None:
-        print(f"Loading model: {MODEL_NAME} on device: {DEVICE}...")
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=torch.bfloat16,
-            bnb_4bit_use_double_quant=False,
-        )
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_NAME,
-            trust_remote_code=True,
-            quantization_config=quantization_config,
-            device_map="auto",
-        )
-        print("Model loaded successfully.")
-@app.on_event("startup")
-async def startup_event():
-    """
-    On server startup, we trigger the model loading.
-    """
-    print("Server starting up...")
-    load_model()
-# ----------------------------
-# 4. API Request/Response Models
-# ----------------------------
-class ReviewRequest(BaseModel):
-    diff: str
-class ReviewComment(BaseModel):
-    file_path: str
-    line_number: int
-    comment_text: str
-class ReviewResponse(BaseModel):
-    comments: list[ReviewComment]
-# ----------------------------
-# 5. The AI Review Logic
-# ----------------------------
-def run_ai_inference(diff: str) -> str:
-    """
-    Runs the AI model to get the review.
-    """
-    if not model or not tokenizer:
-        raise RuntimeError("Model is not loaded.")
-    # Simplified, shorter prompt for faster inference
-    messages = [
-        {
-            "role": "system",
-            "content": "You are a code reviewer. Analyze the diff and respond with ONLY a JSON array. No other text, no markdown, no explanations. Just the JSON array with file_path, line_number, and comment_text fields."
-        },
-        {
-            "role": "user",
-            "content": f"Analyze this specific diff and provide review comments:\n{diff[:800]}"  # Slightly reduced for faster processing
-        }
-    ]
-    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
-    # Create attention mask to avoid warnings and improve reliability
-    attention_mask = torch.ones_like(inputs)
-    # Optimized generation parameters for speed
-    outputs = model.generate(
-        inputs,
-        attention_mask=attention_mask,
-        max_new_tokens=128,  # Further reduced for faster generation
-        do_sample=True,      # Enable sampling to use temperature
-        temperature=0.3,     # Lower temperature for more focused output
-        top_p=0.9,           # Nucleus sampling for better quality
-        num_return_sequences=1,
-        eos_token_id=tokenizer.eos_token_id,
-        pad_token_id=tokenizer.eos_token_id,
-        use_cache=True       # Enable KV cache for faster generation
-    )
-    response_text = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
-    return response_text.strip()
-def parse_ai_response(response_text: str) -> list[ReviewComment]:
-    """
-    Parses the raw text from the AI to extract the JSON array.
-    """
-    print(f"Raw AI Response:\n---\n{response_text}\n---")
-    # Try to find JSON array, handling both direct JSON and markdown-wrapped JSON
-    json_match = re.search(r'\[.*?\]', response_text, re.DOTALL)
-    if not json_match:
-        # Try to find JSON inside markdown code blocks
-        markdown_match = re.search(r'```json\s*(\[.*?\])\s*```', response_text, re.DOTALL)
-        if markdown_match:
-            json_match = markdown_match
-            json_string = markdown_match.group(1)
-        else:
-            print("Warning: Could not find a JSON array in the AI response.")
-            # Return a simple fallback comment
-            return [ReviewComment(
-                file_path="unknown",
-                line_number=1,
-                comment_text="AI review completed - no specific issues found."
-            )]
-    else:
-        json_string = json_match.group(0)
-    try:
-        comments_data = json.loads(json_string)
-        if not isinstance(comments_data, list):
-            print("Warning: AI response is not a JSON array")
-            return [ReviewComment(
-                file_path="unknown",
-                line_number=1,
-                comment_text="AI review completed - format issue."
-            )]
-        validated_comments = []
-        for item in comments_data:
-            try:
-                validated_comments.append(ReviewComment(**item))
-            except (TypeError, ValueError) as e:
-                print(f"Skipping invalid comment: {item}, error: {e}")
-        return validated_comments if validated_comments else [ReviewComment(
-            file_path="unknown",
-            line_number=1,
-            comment_text="AI review completed - no valid comments found."
-        )]
-    except (json.JSONDecodeError, TypeError, KeyError) as e:
-        print(f"Error parsing JSON from AI response: {e}")
-        print(f"Invalid JSON string: {json_string}")
-        # Return a simple fallback comment
-        return [ReviewComment(
-            file_path="unknown",
-            line_number=1,
-            comment_text="AI review completed - response format issue."
-        )]
-# ----------------------------
-# 6. The API Endpoint
-# ----------------------------
-@app.post("/review", response_model=ReviewResponse)
-async def get_code_review(request: ReviewRequest):
-    if not request.diff:
-        raise HTTPException(status_code=400, detail="Diff content cannot be empty.")
-    import time
-    start_time = time.time()
-    print(f"Starting review request at {start_time}")
-    try:
-        print("Running AI inference...")
-        ai_response_text = run_ai_inference(request.diff)
-        print(f"AI inference completed in {time.time() - start_time:.2f} seconds")
-        print("Parsing AI response...")
-        parsed_comments = parse_ai_response(ai_response_text)
-        print(f"Total processing time: {time.time() - start_time:.2f} seconds")
-        return ReviewResponse(comments=parsed_comments)
-    except Exception as e:
-        print(f"An unexpected error occurred after {time.time() - start_time:.2f} seconds: {e}")
-        raise HTTPException(status_code=500, detail="An internal error occurred while processing the review.")
-# ----------------------------
-# 7. Health Check Endpoint
-# ----------------------------
-@app.get("/health")
-async def health_check():
-    return {"status": "ok", "model_loaded": model is not None}