Spaces:

vishaldhakad
/

SecureCodeEnv

Sleeping

App Files Files Community

SecureCodeEnv / openenv.yaml

vishaldhakad

frontend adding

7257069 10 days ago

raw

history blame contribute delete

4.5 kB

	name: SecureCodeEnv
	version: "2.0.0"
	description: >
	An RL environment for training LLM agents to write production-ready,
	secure Python code. Agents are graded on correctness, security attack
	resistance (dynamic adversarial payloads), CWE-based static analysis,
	performance, and codebase consistency via a novel CodeGraph memory system.
	No other public OpenEnv environment combines attack simulation + codebase
	consistency grading. All grading is 100% automated and deterministic.

	author: Vishal Dhakad
	hf_space: vishaldhakad/SecureCodeEnv
	license: MIT

	action_space:
	type: text
	description: Python source code string submitted by the agent
	fields:
	- name: code
	type: string
	description: The complete Python function(s) to be graded
	- name: filename
	type: string
	description: Logical filename for CodeGraph tracking (e.g. src/auth/validator.py)
	- name: session_id
	type: string
	description: Session ID returned from /reset

	observation_space:
	type: structured
	fields:
	- name: total_reward
	type: float
	range: [0.0, 1.0]
	description: Weighted final score across all 7 dimensions
	- name: scores
	type: dict
	description: >
	Per-dimension scores: correctness, attack_resist, static_security,
	consistency, performance, documentation, code_structure
	- name: feedback
	type: dict
	description: Human-readable feedback string per grading dimension
	- name: codegraph
	type: dict
	description: >
	Full codebase context including components, detected conventions,
	dependency list, and natural-language context prompt for the agent
	- name: done
	type: bool
	description: True if episode is complete (reward >= 0.90 or max steps reached)
	- name: step_count
	type: int
	description: Current step number within the episode

	reward:
	type: multi_dimensional
	range: [0.0, 1.0]
	dimensions:
	- name: correctness
	weight: 0.30
	description: Fraction of test cases passed (including edge cases)
	- name: attack_resistance
	weight: 0.20
	description: Fraction of randomized adversarial payloads blocked
	- name: static_security
	weight: 0.15
	description: bandit + AST security linter score (CWE-mapped)
	- name: codegraph_consistency
	weight: 0.15
	description: Adherence to conventions from existing codebase components
	- name: performance
	weight: 0.10
	description: Relative efficiency vs naive/optimal baselines (timeit)
	- name: documentation
	weight: 0.05
	description: Docstring + type hint coverage across all functions
	- name: code_structure
	weight: 0.05
	description: Clean code checks (no bare print, no bare except, etc.)

	tasks:
	- id: easy_password_validator
	difficulty: easy
	cwe: [CWE-916, CWE-521]
	description: Validate password strength and hash with bcrypt (not MD5)

	- id: easy_input_sanitizer
	difficulty: easy
	cwe: [CWE-20, CWE-116]
	description: Sanitize HTML (XSS prevention) and filenames

	- id: easy_token_generator
	difficulty: easy
	cwe: [CWE-338, CWE-330]
	description: Generate cryptographically secure tokens using secrets module

	- id: medium_sql_query_builder
	difficulty: medium
	cwe: [CWE-89, CWE-20]
	description: Build parameterized SQL queries — never string-format user input

	- id: medium_file_path_handler
	difficulty: medium
	cwe: [CWE-22, CWE-20]
	description: Resolve file paths safely — block path traversal attacks

	- id: medium_rate_limiter
	difficulty: medium
	cwe: [CWE-770, CWE-400]
	description: Thread-safe sliding window rate limiter

	- id: hard_file_upload_handler
	difficulty: hard
	cwe: [CWE-22, CWE-434]
	description: Validate uploads — block traversal filenames, executable extensions, MIME spoofing

	- id: hard_jwt_validator
	difficulty: hard
	cwe: [CWE-347, CWE-613]
	description: Validate JWTs — enforce HS256, block none-alg attack, check expiry

	- id: hard_auth_middleware
	difficulty: hard
	cwe: [CWE-287, CWE-352]
	description: CSRF protection and Bearer auth using hmac.compare_digest (timing-safe)

	runtime:
	max_steps_per_episode: 5
	done_reward_threshold: 0.90
	max_inference_time_minutes: 20
	min_vcpu: 2
	min_memory_gb: 8
	port: 7860

	endpoints:
	health: GET /health
	reset: POST /reset
	step: POST /step
	state: GET /state
	docs: GET /docs