anugrah55 commited on
Commit
ee14542
·
verified ·
1 Parent(s): 07e79cc

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (8.2 kB). View file
 
__pycache__/server.cpython-313.pyc ADDED
Binary file (1.68 kB). View file
 
openenv.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ name: opensleuth
2
+ version: 0.1.0
3
+ description: An OpenEnv environment for training LLMs to reverse-engineer black-box functions.
4
+ author: Gemini
5
+ contact: gemini@google.com
opensleuth_env/.DS_Store ADDED
Binary file (8.2 kB). View file
 
opensleuth_env/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the 'opensleuth_env' directory a Python package.
opensleuth_env/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (159 Bytes). View file
 
opensleuth_env/__pycache__/black_box.cpython-313.pyc ADDED
Binary file (1.28 kB). View file
 
opensleuth_env/__pycache__/env.cpython-313.pyc ADDED
Binary file (5.11 kB). View file
 
opensleuth_env/__pycache__/models.cpython-313.pyc ADDED
Binary file (1.89 kB). View file
 
opensleuth_env/__pycache__/verifier.cpython-313.pyc ADDED
Binary file (4.43 kB). View file
 
opensleuth_env/black_box.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def fibonacci(n: int) -> int:
2
+ """
3
+ Calculates the nth Fibonacci number.
4
+ - Handles positive integers up to 90 to avoid large numbers.
5
+ - Raises ValueError for non-positive inputs or large inputs.
6
+ """
7
+ if not isinstance(n, int) or n <= 0 or n > 90:
8
+ raise ValueError("Input must be a positive integer less than or equal to 90.")
9
+ if n == 1:
10
+ return 1
11
+ a, b = 0, 1
12
+ for _ in range(n - 1):
13
+ a, b = b, a + b
14
+ return b
15
+
16
+ # --- Add more black-box functions for later stages ---
17
+
18
+ def reverse_string(s: str) -> str:
19
+ """
20
+ Reverses a string.
21
+ - Raises TypeError for non-string inputs.
22
+ """
23
+ if not isinstance(s, str):
24
+ raise TypeError("Input must be a string.")
25
+ return s[::-1]
26
+
27
+ # --- Dictionary to hold all available black-box functions ---
28
+ BLACK_BOX_FUNCTIONS = {
29
+ "fibonacci": fibonacci,
30
+ "reverse_string": reverse_string,
31
+ }
opensleuth_env/client.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # This file is intentionally left blank.
2
+ # It is used to create a client package if we were to publish this.
3
+ # For the hackathon, we will interact with the server directly.
opensleuth_env/env.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opensleuth_env.models import Action, Observation, State, ProbeAction, SubmitAction
2
+ from opensleuth_env.black_box import BLACK_BOX_FUNCTIONS
3
+ from opensleuth_env.verifier import verify_submission
4
+ import random
5
+ import traceback
6
+
7
+ class OpenSleuthEnv:
8
+ def __init__(self):
9
+ self.state = None
10
+ # The verifier is now a static function, so no need to init it
11
+
12
+ def reset(self, target_name: str = "fibonacci") -> Observation:
13
+ """
14
+ Resets the environment to a new episode.
15
+ Selects a black-box function and clears the history.
16
+ """
17
+ if target_name not in BLACK_BOX_FUNCTIONS:
18
+ raise ValueError(f"Unknown target function: {target_name}")
19
+
20
+ self.state = State(
21
+ target_function_name=target_name,
22
+ probe_history=[],
23
+ seen_outputs=set(),
24
+ seen_error_types=set(),
25
+ )
26
+ return Observation(probe_history=[], last_error="")
27
+
28
+ def step(self, action: Action) -> tuple[Observation, float, bool]:
29
+ """
30
+ Takes a step in the environment.
31
+ """
32
+ if self.state is None:
33
+ # If reset() was not called, do it now.
34
+ self.reset()
35
+
36
+ # The Pydantic model binding in FastAPI should handle the conversion.
37
+ # This check is for robustness.
38
+ if not isinstance(action, (ProbeAction, SubmitAction)):
39
+ try:
40
+ if action.get("action_type") == "probe":
41
+ action = ProbeAction(**action)
42
+ elif action.get("action_type") == "submit":
43
+ action = SubmitAction(**action)
44
+ else:
45
+ raise ValueError("Invalid action_type")
46
+ except Exception as e:
47
+ obs = Observation(probe_history=self.state.probe_history, last_error=f"Invalid action format: {e}")
48
+ return obs, -20.0, True
49
+
50
+
51
+ if action.action_type == "probe":
52
+ return self._handle_probe(action)
53
+ elif action.action_type == "submit":
54
+ return self._handle_submit(action)
55
+ else:
56
+ obs = Observation(probe_history=self.state.probe_history, last_error=f"Invalid action type: {action.action_type}")
57
+ return obs, -20.0, True
58
+
59
+ def _handle_probe(self, action: ProbeAction) -> tuple[Observation, float, bool]:
60
+ target_func = BLACK_BOX_FUNCTIONS[self.state.target_function_name]
61
+ intrinsic_reward = 0.0
62
+ last_error = ""
63
+
64
+ try:
65
+ eval_input = action.input
66
+ output = target_func(eval_input)
67
+ self.state.probe_history.append((eval_input, output))
68
+ if str(output) not in self.state.seen_outputs:
69
+ intrinsic_reward += 2.0
70
+ self.state.seen_outputs.add(str(output))
71
+
72
+ except Exception as e:
73
+ error_type = type(e).__name__
74
+ error_str = traceback.format_exc()
75
+ self.state.probe_history.append((action.input, error_str))
76
+ last_error = error_str
77
+ if error_type not in self.state.seen_error_types:
78
+ intrinsic_reward += 5.0
79
+ self.state.seen_error_types.add(error_type)
80
+
81
+ reward = intrinsic_reward - 1.0
82
+ obs = Observation(probe_history=self.state.probe_history, last_error=last_error)
83
+ return obs, reward, False
84
+
85
+ def _handle_submit(self, action: SubmitAction) -> tuple[Observation, float, bool]:
86
+ target_func = BLACK_BOX_FUNCTIONS[self.state.target_function_name]
87
+ execution_reward, complexity_penalty = verify_submission(action.code, target_func)
88
+ total_reward = execution_reward - complexity_penalty
89
+ if execution_reward == 100.0:
90
+ total_reward += 50.0
91
+
92
+ obs = Observation(probe_history=self.state.probe_history, last_error="")
93
+ return obs, total_reward, True
opensleuth_env/models.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union, List, Tuple, Any, Literal
2
+ from pydantic import BaseModel, Field
3
+
4
+ class ProbeAction(BaseModel):
5
+ action_type: Literal["probe"] = "probe"
6
+ input: Any
7
+
8
+ class SubmitAction(BaseModel):
9
+ action_type: Literal["submit"] = "submit"
10
+ code: str
11
+
12
+ Action = Union[ProbeAction, SubmitAction]
13
+
14
+ class Observation(BaseModel):
15
+ probe_history: List[Tuple[Any, Any]] = Field(
16
+ ...,
17
+ description="A list of (input, output) pairs from previous probes. Output can be a value or an error string."
18
+ )
19
+ last_error: str = Field(
20
+ "",
21
+ description="The error message from the last action, if any."
22
+ )
23
+
24
+ class State(BaseModel):
25
+ target_function_name: str
26
+ probe_history: List[Tuple[Any, Any]]
27
+ # Store unique outputs and error types to calculate intrinsic reward
28
+ seen_outputs: set
29
+ seen_error_types: set
opensleuth_env/verifier.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import random
3
+ import string
4
+ import math
5
+
6
+ class ComplexityVisitor(ast.NodeVisitor):
7
+ def __init__(self):
8
+ self.complexity = 1
9
+ def visit_If(self, node):
10
+ self.complexity += 1
11
+ self.generic_visit(node)
12
+ def visit_For(self, node):
13
+ self.complexity += 1
14
+ self.generic_visit(node)
15
+ def visit_While(self, node):
16
+ self.complexity += 1
17
+ self.generic_visit(node)
18
+ def visit_And(self, node):
19
+ self.complexity += 1
20
+ self.generic_visit(node)
21
+ def visit_Or(self, node):
22
+ self.complexity += 1
23
+ self.generic_visit(node)
24
+ def visit_ExceptHandler(self, node):
25
+ self.complexity += 1
26
+ self.generic_visit(node)
27
+
28
+ def _calculate_cyclomatic_complexity(code: str) -> int:
29
+ try:
30
+ tree = ast.parse(code)
31
+ visitor = ComplexityVisitor()
32
+ visitor.visit(tree)
33
+ return math.log(visitor.complexity)
34
+ except SyntaxError:
35
+ return 50
36
+
37
+ def _generate_fuzz_inputs(target_func, count=100):
38
+ inputs = []
39
+ if target_func.__name__ == "fibonacci":
40
+ inputs = [random.randint(1, 90) for _ in range(count)]
41
+ elif target_func.__name__ == "reverse_string":
42
+ inputs = [''.join(random.choices(string.ascii_letters + string.digits, k=random.randint(1, 20))) for _ in range(count)]
43
+ return inputs
44
+
45
+ def verify_submission(submitted_code: str, target_function: callable, fuzz_count: int = 100) -> tuple[float, float]:
46
+ try:
47
+ local_scope = {}
48
+ exec(submitted_code, {}, local_scope)
49
+ submitted_func = local_scope.get(target_function.__name__)
50
+ if not callable(submitted_func):
51
+ return 0.0, 50.0
52
+ except Exception:
53
+ return 0.0, 50.0
54
+
55
+ fuzz_inputs = _generate_fuzz_inputs(target_function, fuzz_count)
56
+ matches = 0
57
+ for inp in fuzz_inputs:
58
+ try:
59
+ target_output = target_function(inp)
60
+ submitted_output = submitted_func(inp)
61
+ if target_output == submitted_output:
62
+ matches += 1
63
+ except Exception:
64
+ continue
65
+
66
+ execution_reward = 100.0 * (matches / fuzz_count)
67
+ complexity_penalty = _calculate_cyclomatic_complexity(submitted_code)
68
+ return execution_reward, complexity_penalty
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
server.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from opensleuth_env.env import OpenSleuthEnv
4
+ from opensleuth_env.models import Action, Observation
5
+
6
+ app = FastAPI()
7
+ env = OpenSleuthEnv()
8
+
9
+ class ResetBody(BaseModel):
10
+ target_name: str = "fibonacci"
11
+
12
+ @app.post("/reset", response_model=Observation)
13
+ def reset_env(body: ResetBody):
14
+ # Ensure the environment is reset for a new session
15
+ return env.reset(target_name=body.target_name)
16
+
17
+ @app.post("/step")
18
+ def step_env(action: Action):
19
+ # The environment now handles the case where it's not reset
20
+ obs, reward, done = env.step(action)
21
+ return {"observation": obs, "reward": reward, "done": done}
22
+
23
+ @app.get("/state")
24
+ def get_state():
25
+ if env.state is None:
26
+ return {}
27
+ return env.get_state()
test_client.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ # The exact code to be submitted, without shell escaping issues
5
+ code_to_submit = """
6
+ def fibonacci(n: int) -> int:
7
+ if not isinstance(n, int) or n <= 0 or n > 90:
8
+ raise ValueError("Input must be a positive integer less than or equal to 90.")
9
+ if n == 1:
10
+ return 1
11
+ a, b = 0, 1
12
+ for _ in range(n - 1):
13
+ a, b = b, a + b
14
+ return b
15
+ """
16
+
17
+ action = {
18
+ "action_type": "submit",
19
+ "code": code_to_submit
20
+ }
21
+
22
+ # Reset the environment first
23
+ requests.post("http://127.0.0.1:8000/reset", json={"target_name": "fibonacci"})
24
+
25
+ # Now send the step action
26
+ response = requests.post("http://127.0.0.1:8000/step", json=action)
27
+
28
+ print(response.status_code)
29
+ print(response.json())
train.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import requests
3
+ from transformers import AutoTokenizer
4
+ from unsloth import FastLanguageModel
5
+ from trl import GPPOTrainer, PPOConfig
6
+ import json
7
+ import re
8
+
9
+ # == 1. Constants ==
10
+ MAX_STEPS_PER_EPISODE = 15
11
+ ENV_URL = "http://127.0.0.1:8000"
12
+ MODEL_NAME = "unsloth/qwen2-0.5b-instruct-sft-bnb-4bit"
13
+
14
+ # == 2. Prompt Engineering ==
15
+ def build_prompt(probe_history):
16
+ """
17
+ Creates the prompt for the LLM based on the probe history.
18
+ """
19
+ prompt = "You are a reverse-engineering AI. Your goal is to understand a hidden black-box function by probing it and then writing a Python replica.\\n\\n"
20
+ prompt += "== Probe History ==\\n"
21
+ if not probe_history:
22
+ prompt += "No probes yet. Your first action should be a probe.\\n"
23
+ else:
24
+ for i, (inp, out) in enumerate(probe_history):
25
+ prompt += f"{i+1}. IN: {inp} -> OUT: {out}\\n"
26
+
27
+ prompt += "\\n== Your Action ==\\n"
28
+ prompt += "You can either PROBE or SUBMIT.\\n"
29
+ prompt += "To probe, respond with: PROBE(input)\\n"
30
+ prompt += "To submit your code, respond with: SUBMIT\\n```python\\n[your code here]\\n```\\n"
31
+ prompt += "Your decision: "
32
+ return prompt
33
+
34
+ # == 3. Action Parsing ==
35
+ def parse_action_from_response(response_text):
36
+ """
37
+ Parses the model's text response to determine the action.
38
+ """
39
+ probe_match = re.search(r"PROBE\\((.*)\\)", response_text)
40
+ if probe_match:
41
+ inp = probe_match.group(1).strip()
42
+ return {"action_type": "probe", "input": inp}
43
+
44
+ submit_match = re.search(r"SUBMIT\\s*```python\\n(.*)```", response_text, re.DOTALL)
45
+ if submit_match:
46
+ code = submit_match.group(1).strip()
47
+ return {"action_type": "submit", "code": code}
48
+
49
+ # Default to a probe if parsing fails
50
+ return {"action_type": "probe", "input": "1"}
51
+
52
+
53
+ # == 4. Main Training Script ==
54
+ def main():
55
+ # --- Initialize Model ---
56
+ model, tokenizer = FastLanguageModel.from_pretrained(
57
+ model_name = MODEL_NAME,
58
+ max_seq_length = 2048,
59
+ dtype = None,
60
+ load_in_4bit = True,
61
+ )
62
+ # LoRA configuration
63
+ model = FastLanguageModel.get_peft_model(
64
+ model,
65
+ r = 16,
66
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
67
+ lora_alpha = 16,
68
+ lora_dropout = 0,
69
+ bias = "none",
70
+ use_gradient_checkpointing = True,
71
+ random_state = 3407,
72
+ use_rslora = False,
73
+ loftq_config = None,
74
+ )
75
+
76
+ # --- Initialize GPPO Trainer ---
77
+ # Note: GPPO is a new trainer in TRL and might require specific config.
78
+ # This is a placeholder configuration.
79
+ ppo_config = PPOConfig(
80
+ batch_size=4,
81
+ mini_batch_size=1,
82
+ learning_rate=1.41e-5,
83
+ adap_kl_ctrl=False,
84
+ log_with="tensorboard",
85
+ project_kwargs={"logging_dir": "./logs"}
86
+ )
87
+
88
+ # We need a dataset for the trainer, even if it's just a dummy one for initialization
89
+ # In a real RL loop, we provide the experiences directly to the `step` method.
90
+ dummy_dataset = [{"query": "dummy"}]
91
+ gppo_trainer = GPPOTrainer(
92
+ config=ppo_config,
93
+ model=model,
94
+ tokenizer=tokenizer,
95
+ dataset=dummy_dataset,
96
+ )
97
+
98
+ # --- Training Loop ---
99
+ for episode in range(10): # Run for 10 episodes for demonstration
100
+ print(f"--- Episode {episode+1} ---")
101
+
102
+ # Reset environment
103
+ try:
104
+ resp = requests.post(f"{ENV_URL}/reset", json={"target_name": "fibonacci"})
105
+ obs = resp.json()
106
+ except requests.exceptions.ConnectionError as e:
107
+ print(f"ERROR: Could not connect to environment at {ENV_URL}. Is it running?")
108
+ print("Please run 'uvicorn server:app --host 0.0.0.0 --port 8000' in the 'opensleuth_env' directory.")
109
+ return
110
+
111
+ queries, responses, rewards = [], [], []
112
+
113
+ for step in range(MAX_STEPS_PER_EPISODE):
114
+ # Build prompt and generate action
115
+ prompt = build_prompt(obs.get("probe_history", []))
116
+ query_tensor = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
117
+
118
+ # Generate a response from the model
119
+ generation_kwargs = {"min_new_tokens": -1, "top_k": 0.0, "top_p": 1.0, "do_sample": True, "pad_token_id": tokenizer.eos_token_id, "max_new_tokens": 150}
120
+ response_tensor = gppo_trainer.generate(query_tensor, **generation_kwargs)
121
+ response_text = tokenizer.decode(response_tensor[0])
122
+
123
+ # Parse action and execute in environment
124
+ action = parse_action_from_response(response_text)
125
+ step_resp = requests.post(f"{ENV_URL}/step", json=action)
126
+ step_data = step_resp.json()
127
+
128
+ reward = torch.tensor(step_data["reward"], dtype=torch.float32)
129
+ obs = step_data["observation"]
130
+ done = step_data["done"]
131
+
132
+ # Store experience
133
+ queries.append(query_tensor.squeeze())
134
+ responses.append(response_tensor.squeeze())
135
+ rewards.append(reward)
136
+
137
+ print(f"Step {step+1}: Action: {action['action_type']}, Reward: {reward.item():.2f}")
138
+
139
+ if done:
140
+ break
141
+
142
+ # --- Perform PPO Step ---
143
+ # This is a simplified view. The actual step requires careful handling of tensors.
144
+ # The `queries`, `responses`, `rewards` lists need to be formatted correctly.
145
+ try:
146
+ stats = gppo_trainer.step(queries, responses, rewards)
147
+ gppo_trainer.log_stats(stats, {}, rewards)
148
+ print(f" PPO Step done. Mean reward: {stats['ppo/returns/mean']:.2f}")
149
+ except Exception as e:
150
+ print(f"ERROR during trainer.step: {e}")
151
+ print(" Skipping PPO step for this episode. This might happen if all trajectories are truncated.")
152
+
153
+
154
+ if __name__ == "__main__":
155
+ # Ensure the server is running before starting training.
156
+ # We will run the server in the background from the CLI.
157
+ main()
verifier_log.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ \n--- Verifier Fuzzing ---\nInput: 88, Target: 1100087778366101931, Submitted: 1100087778366101931\nInput: 24, Target: 46368, Submitted: 46368\nInput: 14, Target: 377, Submitted: 377\nInput: 67, Target: 44945570212853, Submitted: 44945570212853\nInput: 35, Target: 9227465, Submitted: 9227465\nInput: 82, Target: 61305790721611591, Submitted: 61305790721611591\nInput: 82, Target: 61305790721611591, Submitted: 61305790721611591\nInput: 25, Target: 75025, Submitted: 75025\nInput: 1, Target: 1, Submitted: 1\nInput: 86, Target: 420196140727489673, Submitted: 420196140727489673\n--- End Verifier Fuzzing ---\nExecution Reward: 100.0, Complexity Penalty: 1.6094379124341003\n