Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- analyzers/ds_analyzer.py +1 -1
- analyzers/dsa_analyzer.py +1 -1
- analyzers/ml_analyzer.py +1 -1
- analyzers/web_analyzer.py +1 -1
- models/pytorch_model.py +6 -2
- services/analysis_service.py +3 -3
- services/reward_service.py +12 -8
- triage.py +2 -2
analyzers/ds_analyzer.py
CHANGED
|
@@ -45,7 +45,7 @@ def analyze_data_science_code(code: str, parsed: Dict[str, Any], complexity: Dic
|
|
| 45 |
|
| 46 |
return DomainAnalysis(
|
| 47 |
domain="data_science",
|
| 48 |
-
domain_score=max(0.05, round(score, 4)),
|
| 49 |
issues=issues,
|
| 50 |
suggestions=suggestions,
|
| 51 |
highlights={
|
|
|
|
| 45 |
|
| 46 |
return DomainAnalysis(
|
| 47 |
domain="data_science",
|
| 48 |
+
domain_score=max(0.05, min(0.99, round(score, 4))),
|
| 49 |
issues=issues,
|
| 50 |
suggestions=suggestions,
|
| 51 |
highlights={
|
analyzers/dsa_analyzer.py
CHANGED
|
@@ -37,7 +37,7 @@ def analyze_dsa_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, An
|
|
| 37 |
|
| 38 |
return DomainAnalysis(
|
| 39 |
domain="dsa",
|
| 40 |
-
domain_score=max(0.05, round(score, 4)),
|
| 41 |
issues=issues,
|
| 42 |
suggestions=suggestions,
|
| 43 |
highlights={
|
|
|
|
| 37 |
|
| 38 |
return DomainAnalysis(
|
| 39 |
domain="dsa",
|
| 40 |
+
domain_score=max(0.05, min(0.99, round(score, 4))),
|
| 41 |
issues=issues,
|
| 42 |
suggestions=suggestions,
|
| 43 |
highlights={
|
analyzers/ml_analyzer.py
CHANGED
|
@@ -49,7 +49,7 @@ def analyze_ml_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, Any
|
|
| 49 |
|
| 50 |
return DomainAnalysis(
|
| 51 |
domain="ml_dl",
|
| 52 |
-
domain_score=max(0.05, round(score, 4)),
|
| 53 |
issues=issues,
|
| 54 |
suggestions=suggestions,
|
| 55 |
highlights={
|
|
|
|
| 49 |
|
| 50 |
return DomainAnalysis(
|
| 51 |
domain="ml_dl",
|
| 52 |
+
domain_score=max(0.05, min(0.99, round(score, 4))),
|
| 53 |
issues=issues,
|
| 54 |
suggestions=suggestions,
|
| 55 |
highlights={
|
analyzers/web_analyzer.py
CHANGED
|
@@ -39,7 +39,7 @@ def analyze_web_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, An
|
|
| 39 |
|
| 40 |
return DomainAnalysis(
|
| 41 |
domain="web",
|
| 42 |
-
domain_score=max(0.05, round(score, 4)),
|
| 43 |
issues=issues,
|
| 44 |
suggestions=suggestions,
|
| 45 |
highlights={
|
|
|
|
| 39 |
|
| 40 |
return DomainAnalysis(
|
| 41 |
domain="web",
|
| 42 |
+
domain_score=max(0.05, min(0.99, round(score, 4))),
|
| 43 |
issues=issues,
|
| 44 |
suggestions=suggestions,
|
| 45 |
highlights={
|
models/pytorch_model.py
CHANGED
|
@@ -101,6 +101,10 @@ class PyTorchCodeAnalyzerModel:
|
|
| 101 |
self.backend_name = self._fallback.backend_name
|
| 102 |
self.notes = list(self._fallback.notes) + [f"Pretrained load failed: {type(exc).__name__}: {exc}"]
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
def _embed_texts(self, texts: Sequence[str]) -> torch.Tensor:
|
| 105 |
self._ensure_loaded()
|
| 106 |
if self._model is None or self._tokenizer is None:
|
|
@@ -132,7 +136,7 @@ class PyTorchCodeAnalyzerModel:
|
|
| 132 |
for domain, texts in DOMAIN_PROTOTYPES.items():
|
| 133 |
matrix = self._prototype_matrix(f"domain:{domain}", texts)
|
| 134 |
similarity = torch.matmul(candidate, matrix.T).max().item()
|
| 135 |
-
domain_scores[domain] =
|
| 136 |
|
| 137 |
high_matrix = self._prototype_matrix("quality:high", QUALITY_ANCHORS["high"])
|
| 138 |
low_matrix = self._prototype_matrix("quality:low", QUALITY_ANCHORS["low"])
|
|
@@ -142,7 +146,7 @@ class PyTorchCodeAnalyzerModel:
|
|
| 142 |
|
| 143 |
return {
|
| 144 |
"domain_scores": domain_scores,
|
| 145 |
-
"ml_quality_score":
|
| 146 |
"backend_name": self.backend_name,
|
| 147 |
"model_id": self.model_id,
|
| 148 |
"notes": list(self.notes),
|
|
|
|
| 101 |
self.backend_name = self._fallback.backend_name
|
| 102 |
self.notes = list(self._fallback.notes) + [f"Pretrained load failed: {type(exc).__name__}: {exc}"]
|
| 103 |
|
| 104 |
+
@staticmethod
|
| 105 |
+
def _clamp_score(value: float) -> float:
|
| 106 |
+
return round(max(0.01, min(0.99, float(value))), 4)
|
| 107 |
+
|
| 108 |
def _embed_texts(self, texts: Sequence[str]) -> torch.Tensor:
|
| 109 |
self._ensure_loaded()
|
| 110 |
if self._model is None or self._tokenizer is None:
|
|
|
|
| 136 |
for domain, texts in DOMAIN_PROTOTYPES.items():
|
| 137 |
matrix = self._prototype_matrix(f"domain:{domain}", texts)
|
| 138 |
similarity = torch.matmul(candidate, matrix.T).max().item()
|
| 139 |
+
domain_scores[domain] = self._clamp_score((similarity + 1.0) / 2.0)
|
| 140 |
|
| 141 |
high_matrix = self._prototype_matrix("quality:high", QUALITY_ANCHORS["high"])
|
| 142 |
low_matrix = self._prototype_matrix("quality:low", QUALITY_ANCHORS["low"])
|
|
|
|
| 146 |
|
| 147 |
return {
|
| 148 |
"domain_scores": domain_scores,
|
| 149 |
+
"ml_quality_score": self._clamp_score(float(ml_quality_score)),
|
| 150 |
"backend_name": self.backend_name,
|
| 151 |
"model_id": self.model_id,
|
| 152 |
"notes": list(self.notes),
|
services/analysis_service.py
CHANGED
|
@@ -17,7 +17,7 @@ from utils import estimate_complexity, parse_code_structure
|
|
| 17 |
def _lint_score(parsed: Dict[str, Any]) -> float:
|
| 18 |
"""Convert structural smells into a normalized lint-style score."""
|
| 19 |
|
| 20 |
-
score =
|
| 21 |
if not parsed.get("syntax_valid", True):
|
| 22 |
score -= 0.45
|
| 23 |
score -= min(parsed.get("long_lines", 0), 5) * 0.03
|
|
@@ -27,7 +27,7 @@ def _lint_score(parsed: Dict[str, Any]) -> float:
|
|
| 27 |
score -= 0.05
|
| 28 |
if parsed.get("docstring_ratio", 0.0) == 0.0 and parsed.get("function_names"):
|
| 29 |
score -= 0.08
|
| 30 |
-
return round(max(0.
|
| 31 |
|
| 32 |
|
| 33 |
class AnalysisService:
|
|
@@ -68,7 +68,7 @@ class AnalysisService:
|
|
| 68 |
scores["ml_dl"] += 0.1
|
| 69 |
if "while" in code or "for" in code:
|
| 70 |
scores["dsa"] += 0.05
|
| 71 |
-
return {key: round(min(value, 0.99), 4) for key, value in scores.items()}
|
| 72 |
|
| 73 |
def analyze(self, request: AnalyzeCodeRequest) -> AnalyzeCodeResponse:
|
| 74 |
"""Run the complete multi-domain analysis pipeline."""
|
|
|
|
| 17 |
def _lint_score(parsed: Dict[str, Any]) -> float:
|
| 18 |
"""Convert structural smells into a normalized lint-style score."""
|
| 19 |
|
| 20 |
+
score = 0.99
|
| 21 |
if not parsed.get("syntax_valid", True):
|
| 22 |
score -= 0.45
|
| 23 |
score -= min(parsed.get("long_lines", 0), 5) * 0.03
|
|
|
|
| 27 |
score -= 0.05
|
| 28 |
if parsed.get("docstring_ratio", 0.0) == 0.0 and parsed.get("function_names"):
|
| 29 |
score -= 0.08
|
| 30 |
+
return round(max(0.01, min(0.99, score)), 4)
|
| 31 |
|
| 32 |
|
| 33 |
class AnalysisService:
|
|
|
|
| 68 |
scores["ml_dl"] += 0.1
|
| 69 |
if "while" in code or "for" in code:
|
| 70 |
scores["dsa"] += 0.05
|
| 71 |
+
return {key: round(max(0.01, min(value, 0.99)), 4) for key, value in scores.items()}
|
| 72 |
|
| 73 |
def analyze(self, request: AnalyzeCodeRequest) -> AnalyzeCodeResponse:
|
| 74 |
"""Run the complete multi-domain analysis pipeline."""
|
services/reward_service.py
CHANGED
|
@@ -8,6 +8,10 @@ from schemas.response import ScoreBreakdown
|
|
| 8 |
class RewardService:
|
| 9 |
"""Compute reward scores from model, domain, lint, and complexity signals."""
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def compute(self, *, ml_score: float, domain_score: float, lint_score: float, complexity_penalty: float) -> ScoreBreakdown:
|
| 12 |
"""Apply dynamic reward shaping based on quality, errors, and completion."""
|
| 13 |
|
|
@@ -27,12 +31,12 @@ class RewardService:
|
|
| 27 |
),
|
| 28 |
)
|
| 29 |
return ScoreBreakdown(
|
| 30 |
-
ml_score=
|
| 31 |
-
domain_score=
|
| 32 |
-
lint_score=
|
| 33 |
-
complexity_penalty=
|
| 34 |
-
quality_signal=
|
| 35 |
-
error_reduction_signal=
|
| 36 |
-
completion_signal=
|
| 37 |
-
reward=
|
| 38 |
)
|
|
|
|
| 8 |
class RewardService:
|
| 9 |
"""Compute reward scores from model, domain, lint, and complexity signals."""
|
| 10 |
|
| 11 |
+
@staticmethod
|
| 12 |
+
def _clamp_score(value: float) -> float:
|
| 13 |
+
return round(max(0.01, min(0.99, float(value))), 4)
|
| 14 |
+
|
| 15 |
def compute(self, *, ml_score: float, domain_score: float, lint_score: float, complexity_penalty: float) -> ScoreBreakdown:
|
| 16 |
"""Apply dynamic reward shaping based on quality, errors, and completion."""
|
| 17 |
|
|
|
|
| 31 |
),
|
| 32 |
)
|
| 33 |
return ScoreBreakdown(
|
| 34 |
+
ml_score=self._clamp_score(ml_score),
|
| 35 |
+
domain_score=self._clamp_score(domain_score),
|
| 36 |
+
lint_score=self._clamp_score(lint_score),
|
| 37 |
+
complexity_penalty=self._clamp_score(complexity_penalty),
|
| 38 |
+
quality_signal=self._clamp_score(quality_signal),
|
| 39 |
+
error_reduction_signal=self._clamp_score(error_reduction_signal),
|
| 40 |
+
completion_signal=self._clamp_score(completion_signal),
|
| 41 |
+
reward=self._clamp_score(reward),
|
| 42 |
)
|
triage.py
CHANGED
|
@@ -182,7 +182,7 @@ def _repair_risk(label: IssueLabel, confidence: float, signal_count: int) -> str
|
|
| 182 |
|
| 183 |
|
| 184 |
def _clamp_unit(value: float) -> float:
|
| 185 |
-
return round(max(0.
|
| 186 |
|
| 187 |
|
| 188 |
def _lint_score(code: str) -> float:
|
|
@@ -190,7 +190,7 @@ def _lint_score(code: str) -> float:
|
|
| 190 |
if not stripped_lines:
|
| 191 |
return 0.2
|
| 192 |
|
| 193 |
-
score =
|
| 194 |
if any(len(line) > 88 for line in stripped_lines):
|
| 195 |
score -= 0.15
|
| 196 |
if any(line.rstrip() != line for line in stripped_lines):
|
|
|
|
| 182 |
|
| 183 |
|
| 184 |
def _clamp_unit(value: float) -> float:
|
| 185 |
+
return round(max(0.01, min(0.99, float(value))), 4)
|
| 186 |
|
| 187 |
|
| 188 |
def _lint_score(code: str) -> float:
|
|
|
|
| 190 |
if not stripped_lines:
|
| 191 |
return 0.2
|
| 192 |
|
| 193 |
+
score = 0.99
|
| 194 |
if any(len(line) > 88 for line in stripped_lines):
|
| 195 |
score -= 0.15
|
| 196 |
if any(line.rstrip() != line for line in stripped_lines):
|