Sgridda commited on
Commit
a1f54c5
·
1 Parent(s): 487d58e

trying different model

Browse files
Files changed (2) hide show
  1. main.py +114 -178
  2. main_ai_version.py +0 -214
main.py CHANGED
@@ -1,73 +1,60 @@
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
 
3
  import torch
4
- import logging
5
- import json
6
  import re
7
- import time
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Ultra-lightweight version with minimal AI
10
  app = FastAPI(
11
  title="AI Code Review Service",
12
- description="AI-powered code reviews with DistilGPT-2 and fallback to mock responses",
13
- version="2.0.0",
14
  )
15
 
16
- # Configure logging
17
- logging.basicConfig(level=logging.INFO)
18
- logger = logging.getLogger(__name__)
19
 
20
- # Global variables for model
21
  model = None
22
  tokenizer = None
23
- model_loaded = False
24
-
25
- def load_simple_model():
26
- """Try to load the smallest possible model."""
27
- global model, tokenizer, model_loaded
28
-
29
- if model_loaded:
30
- return True
31
-
32
- try:
33
- logger.info("Attempting to load DistilGPT-2 model...")
34
- from transformers import AutoTokenizer, AutoModelForCausalLM
35
-
36
- model_name = "distilgpt2"
37
-
38
- # Load tokenizer
39
- tokenizer = AutoTokenizer.from_pretrained(model_name)
40
- if tokenizer.pad_token is None:
41
- tokenizer.pad_token = tokenizer.eos_token
42
-
43
- # Load model with conservative settings
44
  model = AutoModelForCausalLM.from_pretrained(
45
- model_name,
46
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
47
- device_map="auto" if torch.cuda.is_available() else "cpu",
48
- low_cpu_mem_usage=True
49
  )
50
-
51
- model_loaded = True
52
- logger.info("✅ DistilGPT-2 model loaded successfully!")
53
- return True
54
-
55
- except ImportError as e:
56
- logger.warning("❌ Transformers library not available: %s", str(e))
57
- return False
58
- except Exception as e:
59
- logger.warning("❌ Failed to load AI model: %s. Using mock responses only.", str(e))
60
- return False
61
-
62
- # Try to load model on startup (but don't block if it fails)
63
- try:
64
- model_loaded = load_simple_model()
65
- except Exception as e:
66
- logger.warning("Model loading failed during startup: %s", str(e))
67
- model_loaded = False
68
-
69
- # API Models
70
- class DiffRequest(BaseModel):
71
  diff: str
72
 
73
  class ReviewComment(BaseModel):
@@ -78,132 +65,81 @@ class ReviewComment(BaseModel):
78
  class ReviewResponse(BaseModel):
79
  comments: list[ReviewComment]
80
 
81
- # Root endpoint for consistency
82
- @app.get("/")
83
- def read_root():
84
- return {
85
- "message": "AI Code Review Service is running!",
86
- "version": "2.0.0",
87
- "model_loaded": model_loaded,
88
- "endpoints": ["/health", "/review", "/docs"]
89
- }
90
 
91
- @app.get("/health")
92
- def health_check():
93
- """Health check endpoint."""
94
- return {
95
- "status": "healthy",
96
- "service": "AI Code Review Service (Lightweight)",
97
- "model_loaded": model_loaded,
98
- "model_name": "distilgpt2" if model_loaded else "mock",
99
- "device": "cuda" if torch.cuda.is_available() else "cpu",
100
- "version": "2.0.0"
101
- }
102
-
103
- def smart_ai_review(diff: str):
104
- """AI review with timeout and better error handling."""
105
- if not model_loaded or not model or not tokenizer:
106
- return None
107
-
108
- try:
109
- # Simple but effective prompt
110
- prompt = f"Code review - suggest improvements:\n{diff[:300]}\nReview:"
111
-
112
- # Encode with proper settings
113
- inputs = tokenizer.encode(
114
- prompt,
115
- return_tensors="pt",
116
- max_length=400,
117
- truncation=True
118
  )
119
-
120
- start_time = time.time()
121
-
122
- # Generate with timeout protection
123
- with torch.no_grad():
124
- outputs = model.generate(
125
- inputs,
126
- max_new_tokens=60, # Balanced length
127
- do_sample=True, # More variety
128
- temperature=0.7, # Moderate creativity
129
- top_p=0.9, # Focus on likely tokens
130
- num_return_sequences=1,
131
- pad_token_id=tokenizer.eos_token_id,
132
- eos_token_id=tokenizer.eos_token_id,
133
- use_cache=True
134
- )
135
-
136
- generation_time = time.time() - start_time
137
- logger.info("AI generation completed in %.2f seconds", generation_time)
138
-
139
- # Decode response
140
- response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
141
- response = response.strip()
142
-
143
- # Clean up the response
144
- if len(response) > 200:
145
- response = response[:200] + "..."
146
-
147
- return response if response else None
148
-
149
- except Exception as e:
150
- logger.warning("AI generation failed: %s", str(e))
151
- return None
152
 
153
  @app.post("/review", response_model=ReviewResponse)
154
- def review_diff(request: DiffRequest):
155
- """Review endpoint with AI and fallback."""
156
-
157
- # Validate input
158
- if not request.diff or not request.diff.strip():
159
- raise HTTPException(status_code=400, detail="Diff content cannot be empty")
160
-
161
- logger.info("📝 Received diff for review (length: %d chars)", len(request.diff))
162
-
163
  start_time = time.time()
164
- ai_suggestion = None
165
-
166
- # Try AI first if available
167
- if model_loaded:
168
- logger.info("🤖 Attempting AI review...")
169
- ai_suggestion = smart_ai_review(request.diff)
170
-
171
- if ai_suggestion and len(ai_suggestion.strip()) > 10:
172
- # Use AI suggestion
173
- comments = [{
174
- "file_path": "code_file.py",
175
- "line_number": 1,
176
- "comment_text": f"🤖 AI Review: {ai_suggestion}"
177
- }]
178
- logger.info("✅ Returning AI-generated review")
179
 
180
- else:
181
- # Intelligent fallback based on diff content
182
- diff_lower = request.diff.lower()
183
 
184
- if "test" in diff_lower:
185
- comment = "Consider adding more comprehensive test cases and edge case validation."
186
- elif "function" in diff_lower or "def " in request.diff:
187
- comment = "This function looks good! Consider adding docstrings and error handling."
188
- elif "import" in diff_lower:
189
- comment = "New imports detected. Ensure all dependencies are documented in requirements."
190
- elif "class" in diff_lower:
191
- comment = "Nice class structure! Consider adding type hints and comprehensive docstrings."
192
- else:
193
- comment = "Code looks clean! Consider adding comments for complex logic and error handling."
194
-
195
- comments = [{
196
- "file_path": "code_file.py",
197
- "line_number": 1,
198
- "comment_text": f"📋 Quick Review: {comment}"
199
- }]
200
- logger.info("📋 Returning smart fallback review")
201
-
202
- total_time = time.time() - start_time
203
- logger.info("⚡ Total review time: %.2f seconds", total_time)
204
-
205
- return ReviewResponse(comments=comments)
206
-
207
- if __name__ == "__main__":
208
- import uvicorn
209
- uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
 
 
5
  import re
6
+ import json
7
+
8
+ # ----------------------------
9
+ # 1. Configuration
10
+ # ----------------------------
11
+
12
+ MODEL_NAME = "Salesforce/codegen-350M-mono"
13
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
+
15
+ # ----------------------------
16
+ # 2. FastAPI App Initialization
17
+ # ----------------------------
18
 
 
19
  app = FastAPI(
20
  title="AI Code Review Service",
21
+ description="An API to get AI-powered code reviews for pull request diffs.",
22
+ version="1.0.0",
23
  )
24
 
25
+ # ----------------------------
26
+ # 3. AI Model Loading
27
+ # ----------------------------
28
 
 
29
  model = None
30
  tokenizer = None
31
+
32
+ def load_model():
33
+ """Loads the model and tokenizer into memory."""
34
+ global model, tokenizer
35
+ if model is None:
36
+ print(f"Loading model: {MODEL_NAME} on device: {DEVICE}...")
37
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  model = AutoModelForCausalLM.from_pretrained(
39
+ MODEL_NAME,
40
+ torch_dtype=torch.float32,
41
+ device_map="cpu",
 
42
  )
43
+ print("Model loaded successfully.")
44
+
45
+ @app.on_event("startup")
46
+ async def startup_event():
47
+ """
48
+ On server startup, we trigger the model loading.
49
+ """
50
+ print("Server starting up...")
51
+ load_model()
52
+
53
+ # ----------------------------
54
+ # 4. API Request/Response Models
55
+ # ----------------------------
56
+
57
+ class ReviewRequest(BaseModel):
 
 
 
 
 
 
58
  diff: str
59
 
60
  class ReviewComment(BaseModel):
 
65
  class ReviewResponse(BaseModel):
66
  comments: list[ReviewComment]
67
 
68
+ # ----------------------------
69
+ # 5. The AI Review Logic
70
+ # ----------------------------
 
 
 
 
 
 
71
 
72
+ def run_ai_inference(diff: str) -> str:
73
+ """
74
+ Runs the AI model to get the review.
75
+ """
76
+ if not model or not tokenizer:
77
+ raise RuntimeError("Model is not loaded.")
78
+
79
+ # Prompt for codegen-350M-mono
80
+ prompt = f"""# Review this code and suggest improvements:\n{diff[:800]}\n# Review:"""
81
+ inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=1024, truncation=True)
82
+ with torch.no_grad():
83
+ outputs = model.generate(
84
+ inputs,
85
+ max_new_tokens=128,
86
+ do_sample=True,
87
+ temperature=0.7,
88
+ top_p=0.95,
89
+ num_return_sequences=1,
90
+ pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
91
+ eos_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
92
+ use_cache=True
 
 
 
 
 
 
93
  )
94
+ response_text = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
95
+ # Post-process: take only the first non-empty line as the review
96
+ review_lines = [line.strip() for line in response_text.strip().split('\n') if line.strip()]
97
+ review = review_lines[0] if review_lines else "AI review completed - no specific issues found."
98
+ return review
99
+
100
+ def parse_ai_response(response_text: str) -> list[ReviewComment]:
101
+ """
102
+ Parses the raw text from the AI to extract the JSON array.
103
+ """
104
+ # For codegen-350M-mono, just wrap the review in a single comment
105
+ return [ReviewComment(
106
+ file_path="code_reviewed.py",
107
+ line_number=1,
108
+ comment_text=response_text.strip()
109
+ )]
110
+
111
+ # ----------------------------
112
+ # 6. The API Endpoint
113
+ # ----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  @app.post("/review", response_model=ReviewResponse)
116
+ async def get_code_review(request: ReviewRequest):
117
+ if not request.diff:
118
+ raise HTTPException(status_code=400, detail="Diff content cannot be empty.")
119
+
120
+ import time
 
 
 
 
121
  start_time = time.time()
122
+ print(f"Starting review request at {start_time}")
123
+
124
+ try:
125
+ print("Running AI inference...")
126
+ ai_response_text = run_ai_inference(request.diff)
127
+ print(f"AI inference completed in {time.time() - start_time:.2f} seconds")
 
 
 
 
 
 
 
 
 
128
 
129
+ print("Parsing AI response...")
130
+ parsed_comments = parse_ai_response(ai_response_text)
131
+ print(f"Total processing time: {time.time() - start_time:.2f} seconds")
132
 
133
+ return ReviewResponse(comments=parsed_comments)
134
+
135
+ except Exception as e:
136
+ print(f"An unexpected error occurred after {time.time() - start_time:.2f} seconds: {e}")
137
+ raise HTTPException(status_code=500, detail="An internal error occurred while processing the review.")
138
+
139
+ # ----------------------------
140
+ # 7. Health Check Endpoint
141
+ # ----------------------------
142
+
143
+ @app.get("/health")
144
+ async def health_check():
145
+ return {"status": "ok", "model_loaded": model is not None}
 
 
 
 
 
 
 
 
 
 
 
 
 
main_ai_version.py DELETED
@@ -1,214 +0,0 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
- import torch
5
- import re
6
- import json
7
-
8
- # ----------------------------
9
- # 1. Configuration
10
- # ----------------------------
11
-
12
- MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
13
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
-
15
- # ----------------------------
16
- # 2. FastAPI App Initialization
17
- # ----------------------------
18
-
19
- app = FastAPI(
20
- title="AI Code Review Service",
21
- description="An API to get AI-powered code reviews for pull request diffs.",
22
- version="1.0.0",
23
- )
24
-
25
- # ----------------------------
26
- # 3. AI Model Loading
27
- # ----------------------------
28
-
29
- model = None
30
- tokenizer = None
31
-
32
- def load_model():
33
- """Loads the model and tokenizer into memory."""
34
- global model, tokenizer
35
- if model is None:
36
- print(f"Loading model: {MODEL_NAME} on device: {DEVICE}...")
37
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
38
-
39
- quantization_config = BitsAndBytesConfig(
40
- load_in_4bit=True,
41
- bnb_4bit_quant_type="nf4",
42
- bnb_4bit_compute_dtype=torch.bfloat16,
43
- bnb_4bit_use_double_quant=False,
44
- )
45
-
46
- model = AutoModelForCausalLM.from_pretrained(
47
- MODEL_NAME,
48
- trust_remote_code=True,
49
- quantization_config=quantization_config,
50
- device_map="auto",
51
- )
52
- print("Model loaded successfully.")
53
-
54
- @app.on_event("startup")
55
- async def startup_event():
56
- """
57
- On server startup, we trigger the model loading.
58
- """
59
- print("Server starting up...")
60
- load_model()
61
-
62
- # ----------------------------
63
- # 4. API Request/Response Models
64
- # ----------------------------
65
-
66
- class ReviewRequest(BaseModel):
67
- diff: str
68
-
69
- class ReviewComment(BaseModel):
70
- file_path: str
71
- line_number: int
72
- comment_text: str
73
-
74
- class ReviewResponse(BaseModel):
75
- comments: list[ReviewComment]
76
-
77
- # ----------------------------
78
- # 5. The AI Review Logic
79
- # ----------------------------
80
-
81
- def run_ai_inference(diff: str) -> str:
82
- """
83
- Runs the AI model to get the review.
84
- """
85
- if not model or not tokenizer:
86
- raise RuntimeError("Model is not loaded.")
87
-
88
- # Simplified, shorter prompt for faster inference
89
- messages = [
90
- {
91
- "role": "system",
92
- "content": "You are a code reviewer. Analyze the diff and respond with ONLY a JSON array. No other text, no markdown, no explanations. Just the JSON array with file_path, line_number, and comment_text fields."
93
- },
94
- {
95
- "role": "user",
96
- "content": f"Analyze this specific diff and provide review comments:\n{diff[:800]}" # Slightly reduced for faster processing
97
- }
98
- ]
99
-
100
- inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
101
-
102
- # Create attention mask to avoid warnings and improve reliability
103
- attention_mask = torch.ones_like(inputs)
104
-
105
- # Optimized generation parameters for speed
106
- outputs = model.generate(
107
- inputs,
108
- attention_mask=attention_mask,
109
- max_new_tokens=128, # Further reduced for faster generation
110
- do_sample=True, # Enable sampling to use temperature
111
- temperature=0.3, # Lower temperature for more focused output
112
- top_p=0.9, # Nucleus sampling for better quality
113
- num_return_sequences=1,
114
- eos_token_id=tokenizer.eos_token_id,
115
- pad_token_id=tokenizer.eos_token_id,
116
- use_cache=True # Enable KV cache for faster generation
117
- )
118
-
119
- response_text = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
120
- return response_text.strip()
121
-
122
- def parse_ai_response(response_text: str) -> list[ReviewComment]:
123
- """
124
- Parses the raw text from the AI to extract the JSON array.
125
- """
126
- print(f"Raw AI Response:\n---\n{response_text}\n---")
127
-
128
- # Try to find JSON array, handling both direct JSON and markdown-wrapped JSON
129
- json_match = re.search(r'\[.*?\]', response_text, re.DOTALL)
130
- if not json_match:
131
- # Try to find JSON inside markdown code blocks
132
- markdown_match = re.search(r'```json\s*(\[.*?\])\s*```', response_text, re.DOTALL)
133
- if markdown_match:
134
- json_match = markdown_match
135
- json_string = markdown_match.group(1)
136
- else:
137
- print("Warning: Could not find a JSON array in the AI response.")
138
- # Return a simple fallback comment
139
- return [ReviewComment(
140
- file_path="unknown",
141
- line_number=1,
142
- comment_text="AI review completed - no specific issues found."
143
- )]
144
- else:
145
- json_string = json_match.group(0)
146
-
147
- try:
148
- comments_data = json.loads(json_string)
149
- if not isinstance(comments_data, list):
150
- print("Warning: AI response is not a JSON array")
151
- return [ReviewComment(
152
- file_path="unknown",
153
- line_number=1,
154
- comment_text="AI review completed - format issue."
155
- )]
156
-
157
- validated_comments = []
158
- for item in comments_data:
159
- try:
160
- validated_comments.append(ReviewComment(**item))
161
- except (TypeError, ValueError) as e:
162
- print(f"Skipping invalid comment: {item}, error: {e}")
163
-
164
- return validated_comments if validated_comments else [ReviewComment(
165
- file_path="unknown",
166
- line_number=1,
167
- comment_text="AI review completed - no valid comments found."
168
- )]
169
-
170
- except (json.JSONDecodeError, TypeError, KeyError) as e:
171
- print(f"Error parsing JSON from AI response: {e}")
172
- print(f"Invalid JSON string: {json_string}")
173
- # Return a simple fallback comment
174
- return [ReviewComment(
175
- file_path="unknown",
176
- line_number=1,
177
- comment_text="AI review completed - response format issue."
178
- )]
179
-
180
- # ----------------------------
181
- # 6. The API Endpoint
182
- # ----------------------------
183
-
184
- @app.post("/review", response_model=ReviewResponse)
185
- async def get_code_review(request: ReviewRequest):
186
- if not request.diff:
187
- raise HTTPException(status_code=400, detail="Diff content cannot be empty.")
188
-
189
- import time
190
- start_time = time.time()
191
- print(f"Starting review request at {start_time}")
192
-
193
- try:
194
- print("Running AI inference...")
195
- ai_response_text = run_ai_inference(request.diff)
196
- print(f"AI inference completed in {time.time() - start_time:.2f} seconds")
197
-
198
- print("Parsing AI response...")
199
- parsed_comments = parse_ai_response(ai_response_text)
200
- print(f"Total processing time: {time.time() - start_time:.2f} seconds")
201
-
202
- return ReviewResponse(comments=parsed_comments)
203
-
204
- except Exception as e:
205
- print(f"An unexpected error occurred after {time.time() - start_time:.2f} seconds: {e}")
206
- raise HTTPException(status_code=500, detail="An internal error occurred while processing the review.")
207
-
208
- # ----------------------------
209
- # 7. Health Check Endpoint
210
- # ----------------------------
211
-
212
- @app.get("/health")
213
- async def health_check():
214
- return {"status": "ok", "model_loaded": model is not None}