hellinferno commited on
Commit
bf2775e
Β·
1 Parent(s): 34a02ba

Prepare Hugging Face deployment

Browse files
.github/workflows/sync-to-hf.yml CHANGED
@@ -28,7 +28,8 @@ jobs:
28
  exit 0
29
  fi
30
 
31
- space_id="${HF_SPACE_ID:-${GITHUB_REPOSITORY_OWNER}/sql-query-reviewer}"
 
32
 
33
  git config user.email "actions@github.com"
34
  git config user.name "github-actions[bot]"
 
28
  exit 0
29
  fi
30
 
31
+ owner_slug="$(printf '%s' "${GITHUB_REPOSITORY_OWNER}" | tr '[:upper:]' '[:lower:]')"
32
+ space_id="${HF_SPACE_ID:-${owner_slug}/sql-query-reviewer}"
33
 
34
  git config user.email "actions@github.com"
35
  git config user.name "github-actions[bot]"
README.md CHANGED
@@ -126,6 +126,15 @@ This repo is Space-ready because:
126
  - the repo includes a root `Dockerfile`
127
  - the API listens on port `8000`
128
 
 
 
 
 
 
 
 
 
 
129
  To deploy manually from a local machine with git:
130
 
131
  ```bash
@@ -151,7 +160,7 @@ The Hugging Face sync workflow expects:
151
  If `HF_SPACE_ID` is not set, the workflow defaults to:
152
 
153
  ```text
154
- <github-repository-owner>/sql-query-reviewer
155
  ```
156
 
157
  ## Usage Example
 
126
  - the repo includes a root `Dockerfile`
127
  - the API listens on port `8000`
128
 
129
+ Recommended setup:
130
+ 1. Create a new Space at `https://huggingface.co/new-space`
131
+ 2. Set owner to your Hugging Face namespace, name to `sql-query-reviewer`, and SDK to `Docker`
132
+ 3. In GitHub, add repository secret `HF_TOKEN` with a Hugging Face token that can write to Spaces
133
+ 4. In GitHub, add repository variable `HF_SPACE_ID` with the exact repo id, for example `hellinferno/sql-query-reviewer`
134
+ 5. Push to `main` or run the `Sync To Hugging Face` workflow manually from the Actions tab
135
+
136
+ Using `HF_SPACE_ID` is the safest option because your Hugging Face namespace may not match your GitHub owner name exactly.
137
+
138
  To deploy manually from a local machine with git:
139
 
140
  ```bash
 
160
  If `HF_SPACE_ID` is not set, the workflow defaults to:
161
 
162
  ```text
163
+ <lowercased-github-repository-owner>/sql-query-reviewer
164
  ```
165
 
166
  ## Usage Example
server/environment.py CHANGED
@@ -114,7 +114,7 @@ class SQLReviewEnvironment:
114
 
115
  else:
116
  feedback = self._schema_feedback(task)
117
- info = {"context_shared": bool(task.schema)}
118
 
119
  state.total_reward += reward
120
 
 
114
 
115
  else:
116
  feedback = self._schema_feedback(task)
117
+ info = {"context_shared": bool(task.schema_info)}
118
 
119
  state.total_reward += reward
120
 
tests/test_api.py CHANGED
@@ -91,3 +91,20 @@ def test_identify_then_approve_can_finish_successfully() -> None:
91
  assert payload["reward"] > 0
92
  assert payload["info"]["final_score"] is not None
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  assert payload["reward"] > 0
92
  assert payload["info"]["final_score"] is not None
93
 
94
+
95
+ def test_request_more_context_returns_context_shared_flag() -> None:
96
+ client = build_client()
97
+ client.post("/reset", json={"task_id": "easy_001"})
98
+
99
+ response = client.post(
100
+ "/step",
101
+ json={"action_type": "request_more_context", "confidence": 0.7},
102
+ )
103
+
104
+ assert response.status_code == 200
105
+ payload = response.json()
106
+ assert payload["reward"] == 0.0
107
+ assert "context_shared" in payload["info"]
108
+ assert payload["info"]["context_shared"] is True
109
+ assert payload["done"] is False
110
+
tests/test_reward.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import pytest
4
+
5
+ from sql_query_reviewer.models import GroundTruthIssue, SQLReviewAction
6
+ from server.reward import compute_reward
7
+
8
+
9
+ def _action(action_type: str, confidence: float = 0.5) -> SQLReviewAction:
10
+ if action_type == "identify_issue":
11
+ return SQLReviewAction(
12
+ action_type="identify_issue",
13
+ issue_category="syntax",
14
+ issue_description="some issue",
15
+ confidence=confidence,
16
+ )
17
+ if action_type == "suggest_fix":
18
+ return SQLReviewAction(
19
+ action_type="suggest_fix",
20
+ suggested_fix="SELECT 1;",
21
+ confidence=confidence,
22
+ )
23
+ return SQLReviewAction(action_type=action_type, confidence=confidence)
24
+
25
+
26
+ def _issue(severity: float = 0.35) -> GroundTruthIssue:
27
+ return GroundTruthIssue(
28
+ id="test_issue_001",
29
+ category="syntax",
30
+ description="A test issue.",
31
+ severity=severity,
32
+ fix="SELECT 1;",
33
+ keywords=["test"],
34
+ )
35
+
36
+
37
+ # ── identify_issue ────────────────────────────────────────────────────────────
38
+
39
+ def test_identify_issue_duplicate_returns_small_penalty() -> None:
40
+ assert compute_reward(_action("identify_issue"), _issue(), duplicate_issue=True) == pytest.approx(-0.02)
41
+
42
+
43
+ def test_identify_issue_no_match_returns_penalty() -> None:
44
+ assert compute_reward(_action("identify_issue"), None) == pytest.approx(-0.1)
45
+
46
+
47
+ def test_identify_issue_match_no_fix_zero_confidence() -> None:
48
+ # base_reward = min(0.35, 0.35) = 0.35; fix_bonus = 0; confidence_bonus = 0
49
+ assert compute_reward(_action("identify_issue", confidence=0.0), _issue(0.35)) == pytest.approx(0.35)
50
+
51
+
52
+ def test_identify_issue_match_no_fix_full_confidence() -> None:
53
+ # base=0.35 + confidence_bonus=min(0.05, 1.0*0.05)=0.05 β†’ 0.40, capped at 0.4
54
+ assert compute_reward(_action("identify_issue", confidence=1.0), _issue(0.35)) == pytest.approx(0.4)
55
+
56
+
57
+ def test_identify_issue_match_with_fix_zero_confidence() -> None:
58
+ # base=0.35 + fix_bonus=0.08 β†’ 0.43, capped at 0.4
59
+ assert compute_reward(_action("identify_issue", confidence=0.0), _issue(0.35), fix_valid=True) == pytest.approx(0.4)
60
+
61
+
62
+ def test_identify_issue_high_severity_capped_at_035_base() -> None:
63
+ # min(0.9, 0.35) = 0.35
64
+ assert compute_reward(_action("identify_issue", confidence=0.0), _issue(severity=0.9)) == pytest.approx(0.35)
65
+
66
+
67
+ # ── suggest_fix ───────────────────────────────────────────────────────────────
68
+
69
+ def test_suggest_fix_without_previous_issue_is_penalized() -> None:
70
+ assert compute_reward(_action("suggest_fix"), None, has_previous_issue=False) == pytest.approx(-0.05)
71
+
72
+
73
+ def test_suggest_fix_with_previous_issue_invalid_fix() -> None:
74
+ assert compute_reward(_action("suggest_fix"), _issue(), has_previous_issue=True, fix_valid=False) == pytest.approx(0.0)
75
+
76
+
77
+ def test_suggest_fix_with_previous_issue_valid_fix() -> None:
78
+ assert compute_reward(_action("suggest_fix"), _issue(), has_previous_issue=True, fix_valid=True) == pytest.approx(0.1)
79
+
80
+
81
+ # ── approve ───────────────────────────────────────────────────────────────────
82
+
83
+ def test_approve_all_issues_found_gives_positive_reward() -> None:
84
+ assert compute_reward(_action("approve"), None, remaining_unfound=0) == pytest.approx(0.2)
85
+
86
+
87
+ def test_approve_one_issue_missed_gives_penalty() -> None:
88
+ assert compute_reward(_action("approve"), None, remaining_unfound=1) == pytest.approx(-0.15)
89
+
90
+
91
+ def test_approve_many_issues_missed_floors_at_negative_one() -> None:
92
+ # -0.15 * 7 = -1.05 β†’ floored at -1.0
93
+ assert compute_reward(_action("approve"), None, remaining_unfound=7) == pytest.approx(-1.0)
94
+
95
+
96
+ # ── request_more_context ──────────────────────────────────────────────────────
97
+
98
+ def test_request_more_context_returns_zero() -> None:
99
+ assert compute_reward(_action("request_more_context"), None) == pytest.approx(0.0)