Spaces:

anugrah55
/

opensleuth-env-gemini-cli

Paused

App Files Files Community

anugrah55 commited on 13 days ago

Commit

63bb50c

verified ·

1 Parent(s): 7866ce6

Remove obsolete artifacts and trainer code from env Space

Browse files

Files changed (11) hide show

.DS_Store +0 -0
__pycache__/server.cpython-313.pyc +0 -0
opensleuth_env/.DS_Store +0 -0
opensleuth_env/__pycache__/__init__.cpython-313.pyc +0 -0
opensleuth_env/__pycache__/black_box.cpython-313.pyc +0 -0
opensleuth_env/__pycache__/env.cpython-313.pyc +0 -0
opensleuth_env/__pycache__/models.cpython-313.pyc +0 -0
opensleuth_env/__pycache__/verifier.cpython-313.pyc +0 -0
test_client.py +0 -29
train.py +0 -157
verifier_log.txt +0 -1

.DS_Store DELETED Viewed

Binary file (8.2 kB)

__pycache__/server.cpython-313.pyc DELETED Viewed

Binary file (1.68 kB)

opensleuth_env/.DS_Store DELETED Viewed

Binary file (8.2 kB)

opensleuth_env/__pycache__/__init__.cpython-313.pyc DELETED Viewed

Binary file (159 Bytes)

opensleuth_env/__pycache__/black_box.cpython-313.pyc DELETED Viewed

Binary file (1.28 kB)

opensleuth_env/__pycache__/env.cpython-313.pyc DELETED Viewed

Binary file (5.11 kB)

opensleuth_env/__pycache__/models.cpython-313.pyc DELETED Viewed

Binary file (1.89 kB)

opensleuth_env/__pycache__/verifier.cpython-313.pyc DELETED Viewed

Binary file (4.43 kB)

test_client.py DELETED Viewed

@@ -1,29 +0,0 @@
-import requests
-import json
-# The exact code to be submitted, without shell escaping issues
-code_to_submit = """
-def fibonacci(n: int) -> int:
-    if not isinstance(n, int) or n <= 0 or n > 90:
-        raise ValueError("Input must be a positive integer less than or equal to 90.")
-    if n == 1:
-        return 1
-    a, b = 0, 1
-    for _ in range(n - 1):
-        a, b = b, a + b
-    return b
-"""
-action = {
-    "action_type": "submit",
-    "code": code_to_submit
-}
-# Reset the environment first
-requests.post("http://127.0.0.1:8000/reset", json={"target_name": "fibonacci"})
-# Now send the step action
-response = requests.post("http://127.0.0.1:8000/step", json=action)
-print(response.status_code)
-print(response.json())

train.py DELETED Viewed

@@ -1,157 +0,0 @@
-import torch
-import requests
-from transformers import AutoTokenizer
-from unsloth import FastLanguageModel
-from trl import GPPOTrainer, PPOConfig
-import json
-import re
-# == 1. Constants ==
-MAX_STEPS_PER_EPISODE = 15
-ENV_URL = "http://127.0.0.1:8000"
-MODEL_NAME = "unsloth/qwen2-0.5b-instruct-sft-bnb-4bit"
-# == 2. Prompt Engineering ==
-def build_prompt(probe_history):
-    """
-    Creates the prompt for the LLM based on the probe history.
-    """
-    prompt = "You are a reverse-engineering AI. Your goal is to understand a hidden black-box function by probing it and then writing a Python replica.\\n\\n"
-    prompt += "== Probe History ==\\n"
-    if not probe_history:
-        prompt += "No probes yet. Your first action should be a probe.\\n"
-    else:
-        for i, (inp, out) in enumerate(probe_history):
-            prompt += f"{i+1}. IN: {inp} -> OUT: {out}\\n"
-    prompt += "\\n== Your Action ==\\n"
-    prompt += "You can either PROBE or SUBMIT.\\n"
-    prompt += "To probe, respond with: PROBE(input)\\n"
-    prompt += "To submit your code, respond with: SUBMIT\\n```python\\n[your code here]\\n```\\n"
-    prompt += "Your decision: "
-    return prompt
-# == 3. Action Parsing ==
-def parse_action_from_response(response_text):
-    """
-    Parses the model's text response to determine the action.
-    """
-    probe_match = re.search(r"PROBE\\((.*)\\)", response_text)
-    if probe_match:
-        inp = probe_match.group(1).strip()
-        return {"action_type": "probe", "input": inp}
-    submit_match = re.search(r"SUBMIT\\s*```python\\n(.*)```", response_text, re.DOTALL)
-    if submit_match:
-        code = submit_match.group(1).strip()
-        return {"action_type": "submit", "code": code}
-    # Default to a probe if parsing fails
-    return {"action_type": "probe", "input": "1"}
-# == 4. Main Training Script ==
-def main():
-    # --- Initialize Model ---
-    model, tokenizer = FastLanguageModel.from_pretrained(
-        model_name = MODEL_NAME,
-        max_seq_length = 2048,
-        dtype = None,
-        load_in_4bit = True,
-    )
-    # LoRA configuration
-    model = FastLanguageModel.get_peft_model(
-        model,
-        r = 16,
-        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
-        lora_alpha = 16,
-        lora_dropout = 0,
-        bias = "none",
-        use_gradient_checkpointing = True,
-        random_state = 3407,
-        use_rslora = False,
-        loftq_config = None,
-    )
-    # --- Initialize GPPO Trainer ---
-    # Note: GPPO is a new trainer in TRL and might require specific config.
-    # This is a placeholder configuration.
-    ppo_config = PPOConfig(
-        batch_size=4,
-        mini_batch_size=1,
-        learning_rate=1.41e-5,
-        adap_kl_ctrl=False,
-        log_with="tensorboard",
-        project_kwargs={"logging_dir": "./logs"}
-    )
-    # We need a dataset for the trainer, even if it's just a dummy one for initialization
-    # In a real RL loop, we provide the experiences directly to the `step` method.
-    dummy_dataset = [{"query": "dummy"}]
-    gppo_trainer = GPPOTrainer(
-        config=ppo_config,
-        model=model,
-        tokenizer=tokenizer,
-        dataset=dummy_dataset,
-    )
-    # --- Training Loop ---
-    for episode in range(10): # Run for 10 episodes for demonstration
-        print(f"--- Episode {episode+1} ---")
-        # Reset environment
-        try:
-            resp = requests.post(f"{ENV_URL}/reset", json={"target_name": "fibonacci"})
-            obs = resp.json()
-        except requests.exceptions.ConnectionError as e:
-            print(f"ERROR: Could not connect to environment at {ENV_URL}. Is it running?")
-            print("Please run 'uvicorn server:app --host 0.0.0.0 --port 8000' in the 'opensleuth_env' directory.")
-            return
-        queries, responses, rewards = [], [], []
-        for step in range(MAX_STEPS_PER_EPISODE):
-            # Build prompt and generate action
-            prompt = build_prompt(obs.get("probe_history", []))
-            query_tensor = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
-            # Generate a response from the model
-            generation_kwargs = {"min_new_tokens": -1, "top_k": 0.0, "top_p": 1.0, "do_sample": True, "pad_token_id": tokenizer.eos_token_id, "max_new_tokens": 150}
-            response_tensor = gppo_trainer.generate(query_tensor, **generation_kwargs)
-            response_text = tokenizer.decode(response_tensor[0])
-            # Parse action and execute in environment
-            action = parse_action_from_response(response_text)
-            step_resp = requests.post(f"{ENV_URL}/step", json=action)
-            step_data = step_resp.json()
-            reward = torch.tensor(step_data["reward"], dtype=torch.float32)
-            obs = step_data["observation"]
-            done = step_data["done"]
-            # Store experience
-            queries.append(query_tensor.squeeze())
-            responses.append(response_tensor.squeeze())
-            rewards.append(reward)
-            print(f"Step {step+1}: Action: {action['action_type']}, Reward: {reward.item():.2f}")
-            if done:
-                break
-        # --- Perform PPO Step ---
-        # This is a simplified view. The actual step requires careful handling of tensors.
-        # The `queries`, `responses`, `rewards` lists need to be formatted correctly.
-        try:
-            stats = gppo_trainer.step(queries, responses, rewards)
-            gppo_trainer.log_stats(stats, {}, rewards)
-            print(f"  PPO Step done. Mean reward: {stats['ppo/returns/mean']:.2f}")
-        except Exception as e:
-            print(f"ERROR during trainer.step: {e}")
-            print("  Skipping PPO step for this episode. This might happen if all trajectories are truncated.")
-if __name__ == "__main__":
-    # Ensure the server is running before starting training.
-    # We will run the server in the background from the CLI.
-    main()

verifier_log.txt DELETED Viewed

@@ -1 +0,0 @@

- \n--- Verifier Fuzzing ---\nInput: 88, Target: 1100087778366101931, Submitted: 1100087778366101931\nInput: 24, Target: 46368, Submitted: 46368\nInput: 14, Target: 377, Submitted: 377\nInput: 67, Target: 44945570212853, Submitted: 44945570212853\nInput: 35, Target: 9227465, Submitted: 9227465\nInput: 82, Target: 61305790721611591, Submitted: 61305790721611591\nInput: 82, Target: 61305790721611591, Submitted: 61305790721611591\nInput: 25, Target: 75025, Submitted: 75025\nInput: 1, Target: 1, Submitted: 1\nInput: 86, Target: 420196140727489673, Submitted: 420196140727489673\n--- End Verifier Fuzzing ---\nExecution Reward: 100.0, Complexity Penalty: 1.6094379124341003\n