Spaces:
Running
Running
File size: 18,191 Bytes
2fd8593 af9bb6b 2fd8593 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 | import json
from tqdm import tqdm
import argparse
import os
import sys
from utils import print_response, print_log_cost, load_accumulated_cost, save_accumulated_cost
from rate_limiter import RateLimiter, estimate_tokens
from llm_provider import get_provider, get_default_model
parser = argparse.ArgumentParser()
# Support both paper and blog inputs
parser.add_argument('--paper_name', type=str, help='Name of the paper (deprecated, use --content_name)')
parser.add_argument('--blog_name', type=str, help='Name of the blog')
parser.add_argument('--content_name', type=str, help='Name of the content (paper or blog)')
parser.add_argument('--gpt_version', type=str, help='Model version (deprecated, use --model)')
parser.add_argument('--model', type=str, help='Model name (e.g., gpt-4o-mini, gemini-2.0-flash)')
parser.add_argument('--provider', type=str, default='gemini', choices=['openai', 'gemini', 'gemma'], help='LLM provider to use')
parser.add_argument('--paper_format', type=str, default="JSON", choices=["JSON", "LaTeX"], help='Format for papers')
parser.add_argument('--blog_format', type=str, default="JSON", choices=["JSON", "Markdown", "HTML"], help='Format for blogs')
parser.add_argument('--content_format', type=str, default="JSON", help='Format of the content')
parser.add_argument('--pdf_json_path', type=str, help='Path to paper JSON file')
parser.add_argument('--pdf_latex_path', type=str, help='Path to paper LaTeX file')
parser.add_argument('--blog_json_path', type=str, help='Path to blog JSON file')
parser.add_argument('--blog_md_path', type=str, help='Path to blog Markdown file')
parser.add_argument('--blog_html_path', type=str, help='Path to blog HTML file')
parser.add_argument('--content_type', type=str, default="paper", choices=["paper", "blog"], help='Type of content to process')
parser.add_argument('--output_dir', type=str, default="")
args = parser.parse_args()
# Initialize LLM provider
provider_name = args.provider
llm_provider = get_provider(provider_name)
model = args.model or args.gpt_version or get_default_model(provider_name)
print(f"🤖 Using {provider_name.upper()} with model: {model}")
# Determine content type and set variables
if args.blog_name or args.blog_json_path or args.blog_md_path or args.blog_html_path:
content_type = "blog"
content_name = args.blog_name or args.content_name or "BlogPost"
content_format = args.blog_format or args.content_format
content_path = args.blog_json_path or args.blog_md_path or args.blog_html_path
else:
content_type = args.content_type
content_name = args.paper_name or args.content_name or "Paper"
content_format = args.paper_format or args.content_format
content_path = args.pdf_json_path or args.pdf_latex_path
gpt_version = args.gpt_version
output_dir = args.output_dir
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Load content based on format
if content_format in ["JSON"]:
with open(f'{content_path}') as f:
content_data = json.load(f)
elif content_format in ["LaTeX", "Markdown", "HTML"]:
with open(f'{content_path}') as f:
content_data = f.read()
else:
print(f"[ERROR] Invalid format. Please select JSON, LaTeX, Markdown, or HTML.")
sys.exit(0)
if content_type == "blog":
plan_msg = [
{'role': "system", "content": f"""You are an expert software engineer and technical content analyst with deep understanding of tutorial implementation and code reproduction.
You will receive a technical blog post in {content_format} format.
Your task is to create a detailed and efficient plan to implement the code, algorithms, or systems described in the blog post.
This plan should align precisely with the blog's tutorial steps, code examples, and technical specifications.
Instructions:
1. Align with the Blog: Your plan must strictly follow the methods, code examples, configurations, and implementation steps described in the blog.
2. Extract Code Snippets: Identify and organize any existing code snippets from the blog.
3. Fill Gaps: Identify missing implementation details that need to be inferred or completed.
4. Be Clear and Structured: Present the plan in a well-organized and easy-to-follow format, breaking it down into actionable steps.
5. Prioritize Efficiency: Optimize the plan for clarity and practical implementation while ensuring fidelity to the original tutorial.
6. Add Production Features: Plan for error handling, logging, testing, and documentation that may not be in the blog."""},
{"role": "user",
"content" : f"""## Blog Post
{content_data}
## Task
1. We want to implement the tutorial/system described in this blog post.
2. The blog may contain partial code snippets that we need to organize and complete.
3. Before writing the final code, please outline a comprehensive plan that covers:
- Key implementation steps from the blog
- Code architecture and structure
- Dependencies and libraries mentioned
- Configuration requirements
- Any code snippets already provided in the blog
- Missing details that need to be inferred or completed
4. The plan should be as **detailed and practical** as possible to help us write production-ready code.
## Requirements
- Extract and organize any existing code snippets from the blog
- Identify gaps in the blog's explanation that need to be filled
- Focus on creating a **working, complete implementation**
- If something is unclear from the blog, mention it explicitly and suggest reasonable defaults
## Instruction
The response should give us a strong roadmap for turning this blog tutorial into production code."""}]
else:
plan_msg = [
{'role': "system", "content": f"""You are an expert researcher and strategic planner with a deep understanding of experimental design and reproducibility in scientific research.
You will receive a research paper in {content_format} format.
Your task is to create a detailed and efficient plan to reproduce the experiments and methodologies described in the paper.
This plan should align precisely with the paper's methodology, experimental setup, and evaluation metrics.
Instructions:
1. Align with the Paper: Your plan must strictly follow the methods, datasets, model configurations, hyperparameters, and experimental setups described in the paper.
2. Be Clear and Structured: Present the plan in a well-organized and easy-to-follow format, breaking it down into actionable steps.
3. Prioritize Efficiency: Optimize the plan for clarity and practical implementation while ensuring fidelity to the original experiments."""},
{"role": "user",
"content" : f"""## Paper
{content_data}
## Task
1. We want to reproduce the method described in the attached paper.
2. The authors did not release any official code, so we have to plan our own implementation.
3. Before writing any Python code, please outline a comprehensive plan that covers:
- Key details from the paper's **Methodology**.
- Important aspects of **Experiments**, including dataset requirements, experimental settings, hyperparameters, or evaluation metrics.
4. The plan should be as **detailed and informative** as possible to help us write the final code later.
## Requirements
- You don't need to provide the actual code yet; focus on a **thorough, clear strategy**.
- If something is unclear from the paper, mention it explicitly.
## Instruction
The response should give us a strong roadmap, making it easier to write the code later."""}]
file_list_msg = [
{"role": "user", "content": """Your goal is to create a concise, usable, and complete software system design for reproducing the paper's method. Use appropriate open-source libraries and keep the overall architecture simple.
Based on the plan for reproducing the paper’s main method, please design a concise, usable, and complete software system.
Keep the architecture simple and make effective use of open-source libraries.
-----
## Format Example
[CONTENT]
{
"Implementation approach": "We will ... ,
"File list": [
"main.py",
"dataset_loader.py",
"model.py",
"trainer.py",
"evaluation.py"
],
"Data structures and interfaces": "\nclassDiagram\n class Main {\n +__init__()\n +run_experiment()\n }\n class DatasetLoader {\n +__init__(config: dict)\n +load_data() -> Any\n }\n class Model {\n +__init__(params: dict)\n +forward(x: Tensor) -> Tensor\n }\n class Trainer {\n +__init__(model: Model, data: Any)\n +train() -> None\n }\n class Evaluation {\n +__init__(model: Model, data: Any)\n +evaluate() -> dict\n }\n Main --> DatasetLoader\n Main --> Trainer\n Main --> Evaluation\n Trainer --> Model\n",
"Program call flow": "\nsequenceDiagram\n participant M as Main\n participant DL as DatasetLoader\n participant MD as Model\n participant TR as Trainer\n participant EV as Evaluation\n M->>DL: load_data()\n DL-->>M: return dataset\n M->>MD: initialize model()\n M->>TR: train(model, dataset)\n TR->>MD: forward(x)\n MD-->>TR: predictions\n TR-->>M: training complete\n M->>EV: evaluate(model, dataset)\n EV->>MD: forward(x)\n MD-->>EV: predictions\n EV-->>M: metrics\n",
"Anything UNCLEAR": "Need clarification on the exact dataset format and any specialized hyperparameters."
}
[/CONTENT]
## Nodes: "<node>: <type> # <instruction>"
- Implementation approach: <class 'str'> # Summarize the chosen solution strategy.
- File list: typing.List[str] # Only need relative paths. ALWAYS write a main.py or app.py here.
- Data structures and interfaces: typing.Optional[str] # Use mermaid classDiagram code syntax, including classes, method(__init__ etc.) and functions with type annotations, CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design.
- Program call flow: typing.Optional[str] # Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT.
- Anything UNCLEAR: <class 'str'> # Mention ambiguities and ask for clarifications.
## Constraint
Format: output wrapped inside [CONTENT][/CONTENT] like the format example, nothing else.
## Action
Follow the instructions for the nodes, generate the output, and ensure it follows the format example."""}
]
task_list_msg = [
{'role': 'user', 'content': """Your goal is break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies.
You will break down tasks, analyze dependencies.
You outline a clear PRD/technical design for reproducing the paper’s method and experiments.
Now, let's break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies.
The Logic Analysis should not only consider the dependencies between files but also provide detailed descriptions to assist in writing the code needed to reproduce the paper.
-----
## Format Example
[CONTENT]
{
"Required packages": [
"numpy==1.21.0",
"torch==1.9.0"
],
"Required Other language third-party packages": [
"No third-party dependencies required"
],
"Logic Analysis": [
[
"data_preprocessing.py",
"DataPreprocessing class ........"
],
[
"trainer.py",
"Trainer ....... "
],
[
"dataset_loader.py",
"Handles loading and ........"
],
[
"model.py",
"Defines the model ......."
],
[
"evaluation.py",
"Evaluation class ........ "
],
[
"main.py",
"Entry point ......."
]
],
"Task list": [
"dataset_loader.py",
"model.py",
"trainer.py",
"evaluation.py",
"main.py"
],
"Full API spec": "openapi: 3.0.0 ...",
"Shared Knowledge": "Both data_preprocessing.py and trainer.py share ........",
"Anything UNCLEAR": "Clarification needed on recommended hardware configuration for large-scale experiments."
}
[/CONTENT]
## Nodes: "<node>: <type> # <instruction>"
- Required packages: typing.Optional[typing.List[str]] # Provide required third-party packages in requirements.txt format.(e.g., 'numpy==1.21.0').
- Required Other language third-party packages: typing.List[str] # List down packages required for non-Python languages. If none, specify "No third-party dependencies required".
- Logic Analysis: typing.List[typing.List[str]] # Provide a list of files with the classes/methods/functions to be implemented, including dependency analysis and imports. Include as much detailed description as possible.
- Task list: typing.List[str] # Break down the tasks into a list of filenames, prioritized based on dependency order. The task list must include the previously generated file list.
- Full API spec: <class 'str'> # Describe all APIs using OpenAPI 3.0 spec that may be used by both frontend and backend. If front-end and back-end communication is not required, leave it blank.
- Shared Knowledge: <class 'str'> # Detail any shared knowledge, like common utility functions or configuration variables.
- Anything UNCLEAR: <class 'str'> # Mention any unresolved questions or clarifications needed from the paper or project scope.
## Constraint
Format: output wrapped inside [CONTENT][/CONTENT] like the format example, nothing else.
## Action
Follow the node instructions above, generate your output accordingly, and ensure it follows the given format example."""}]
# config
config_msg = [
{'role': 'user', 'content': """You write elegant, modular, and maintainable code. Adhere to Google-style guidelines.
Based on the paper, plan, design specified previously, follow the "Format Example" and generate the code.
Extract the training details from the above paper (e.g., learning rate, batch size, epochs, etc.), follow the "Format example" and generate the code.
DO NOT FABRICATE DETAILS — only use what the paper provides.
You must write `config.yaml`.
ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Your output format must follow the example below exactly.
-----
# Format Example
## Code: config.yaml
```yaml
## config.yaml
training:
learning_rate: ...
batch_size: ...
epochs: ...
...
```
-----
## Code: config.yaml
"""
}]
def api_call(msg, model_name):
"""Make API call using the configured provider"""
# Special handling for o3-mini reasoning effort
if "o3-mini" in model_name and provider_name == 'openai':
completion = llm_provider.create_completion(
messages=msg,
model=model_name,
reasoning_effort="high"
)
else:
completion = llm_provider.create_completion(
messages=msg,
model=model_name
)
return completion
responses = []
trajectories = []
total_accumulated_cost = 0
# Initialize rate limiter to avoid hitting TPM limits
rate_limiter = RateLimiter(max_tokens_per_minute=95000) # 95K with 5K buffer
print("🛡️ Rate limiter initialized (95K TPM limit)")
for idx, instruction_msg in enumerate([plan_msg, file_list_msg, task_list_msg, config_msg]):
current_stage = ""
if idx == 0 :
current_stage = f"[Planning] Overall plan"
elif idx == 1:
current_stage = f"[Planning] Architecture design"
elif idx == 2:
current_stage = f"[Planning] Logic design"
elif idx == 3:
current_stage = f"[Planning] Configuration file generation"
print(current_stage)
trajectories.extend(instruction_msg)
# Estimate tokens for this request and wait if needed
estimated_tokens = estimate_tokens(str(trajectories))
rate_limiter.wait_if_needed(estimated_tokens)
completion = api_call(trajectories, model)
# Extract response text using provider abstraction
response_text = llm_provider.get_response_text(completion)
usage_info = llm_provider.get_usage_info(completion)
# Create completion JSON for logging (compatible format)
completion_json = {
'choices': [{'message': {'role': 'assistant', 'content': response_text}}],
'usage': usage_info,
'model': model
}
# print and logging
print_response(completion_json)
temp_total_accumulated_cost = print_log_cost(completion_json, model, current_stage, output_dir, total_accumulated_cost)
total_accumulated_cost = temp_total_accumulated_cost
responses.append(completion_json)
# trajectories
message = {'role': 'assistant', 'content': response_text}
trajectories.append(message)
# save
save_accumulated_cost(f"{output_dir}/accumulated_cost.json", total_accumulated_cost)
# Print rate limiter statistics
rate_limiter.print_stats()
os.makedirs(output_dir, exist_ok=True)
with open(f'{output_dir}/planning_response.json', 'w') as f:
json.dump(responses, f)
with open(f'{output_dir}/planning_trajectories.json', 'w') as f:
json.dump(trajectories, f)
# Export planning as markdown for easy reference
print("\n📝 Exporting planning to markdown...")
with open(f'{output_dir}/planning_output.md', 'w', encoding='utf-8') as f:
f.write(f"# Planning Output for {content_name}\n\n")
f.write(f"**Model:** {model}\n")
f.write(f"**Provider:** {provider_name}\n")
f.write(f"**Content Type:** {content_type}\n\n")
f.write("---\n\n")
for idx, response in enumerate(responses):
stage_names = ["Overall Plan", "Architecture Design", "Logic Design", "Configuration"]
stage_name = stage_names[idx] if idx < len(stage_names) else f"Stage {idx+1}"
f.write(f"## {stage_name}\n\n")
content = response['choices'][0]['message']['content']
f.write(content)
f.write("\n\n---\n\n")
print(f"✅ Planning saved to: {output_dir}/planning_output.md")
|