Spaces:

srishtichugh
/

blog2code-api

Sleeping

App Files Files Community

srishtichugh commited on 5 days ago

Commit

2fd8593

0 Parent(s):

initial commit

Browse files

Files changed (15) hide show

Dockerfile +10 -0
app.py +1 -0
codes/0_blog_process.py +136 -0
codes/1_1_extract_config.py +66 -0
codes/1_planning.py +385 -0
codes/2_analyzing.py +228 -0
codes/3_coding.py +249 -0
codes/eval.py +277 -0
codes/example_use_gemma.py +27 -0
codes/llm_provider.py +342 -0
codes/rate_limiter.py +97 -0
codes/test_gemma.py +39 -0
codes/utils.py +440 -0
main.py +136 -0
requirements.txt +13 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from main import app

codes/0_blog_process.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import json
+import argparse
+import requests
+from bs4 import BeautifulSoup
+import markdown
+from urllib.parse import urlparse
+def fetch_blog_from_url(url):
+    """Fetch blog content from URL"""
+    try:
+        # Add user agent to avoid 403 errors from sites like Medium
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        response = requests.get(url, timeout=30, headers=headers)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Extract main content (adjust selectors based on blog platform)
+        title = soup.find('h1').get_text() if soup.find('h1') else "Untitled"
+        # Common content selectors - try multiple strategies
+        content = (soup.find('article') or
+                  soup.find('main') or
+                  soup.find('div', class_='content') or
+                  soup.find('div', class_='post-content') or
+                  soup.find('div', class_='entry-content'))
+        if content:
+            text = content.get_text(separator='\n', strip=True)
+            # Also extract code blocks separately
+            code_blocks = content.find_all(['pre', 'code'])
+            codes = [block.get_text() for block in code_blocks]
+        else:
+            text = soup.get_text(separator='\n', strip=True)
+            codes = []
+        return {
+            'title': title,
+            'url': url,
+            'content': text,
+            'code_snippets': codes
+        }
+    except Exception as e:
+        print(f"[ERROR] Failed to fetch URL: {e}")
+        raise
+def process_markdown_file(file_path):
+    """Process markdown blog file"""
+    with open(file_path, 'r', encoding='utf-8') as f:
+        md_content = f.read()
+    # Convert markdown to HTML then extract text
+    html = markdown.markdown(md_content, extensions=['fenced_code', 'codehilite'])
+    soup = BeautifulSoup(html, 'html.parser')
+    # Extract title (first h1)
+    title = soup.find('h1')
+    title_text = title.get_text() if title else "Untitled"
+    # Extract code blocks
+    code_blocks = soup.find_all(['pre', 'code'])
+    codes = [block.get_text() for block in code_blocks]
+    return {
+        'title': title_text,
+        'content': md_content,
+        'html': html,
+        'code_snippets': codes
+    }
+def process_html_file(file_path):
+    """Process HTML blog file"""
+    with open(file_path, 'r', encoding='utf-8') as f:
+        html_content = f.read()
+    soup = BeautifulSoup(html_content, 'html.parser')
+    title = soup.find('h1').get_text() if soup.find('h1') else "Untitled"
+    text = soup.get_text(separator='\n', strip=True)
+    # Extract code blocks
+    code_blocks = soup.find_all(['pre', 'code'])
+    codes = [block.get_text() for block in code_blocks]
+    return {
+        'title': title,
+        'content': text,
+        'html': html_content,
+        'code_snippets': codes
+    }
+def process_text_file(file_path):
+    """Process plain text file"""
+    with open(file_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    return {
+        'title': 'Blog Post',
+        'content': content,
+        'code_snippets': []
+    }
+def main(args):
+    if args.url:
+        print(f"[INFO] Fetching blog from URL: {args.url}")
+        blog_data = fetch_blog_from_url(args.url)
+    elif args.input_path:
+        print(f"[INFO] Processing local file: {args.input_path}")
+        if args.input_path.endswith('.md'):
+            blog_data = process_markdown_file(args.input_path)
+        elif args.input_path.endswith('.html'):
+            blog_data = process_html_file(args.input_path)
+        else:
+            # Plain text
+            blog_data = process_text_file(args.input_path)
+    else:
+        print("[ERROR] Must provide either --url or --input_path")
+        return
+    # Save as JSON
+    with open(args.output_json_path, 'w', encoding='utf-8') as f:
+        json.dump(blog_data, f, indent=2, ensure_ascii=False)
+    print(f"[SAVED] {args.output_json_path}")
+    print(f"[INFO] Title: {blog_data['title']}")
+    print(f"[INFO] Content length: {len(blog_data['content'])} characters")
+    print(f"[INFO] Code snippets found: {len(blog_data.get('code_snippets', []))}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process blog posts into JSON format for Blog2Code")
+    parser.add_argument("--url", type=str, help="Blog URL to fetch")
+    parser.add_argument("--input_path", type=str, help="Local blog file path (.md, .html, or .txt)")
+    parser.add_argument("--output_json_path", type=str, required=True, help="Output JSON file path")
+    args = parser.parse_args()
+    main(args)

codes/1_1_extract_config.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import json
+import re
+import os
+import argparse
+import shutil
+from utils import extract_planning, content_to_json, format_json_data
+parser = argparse.ArgumentParser()
+parser.add_argument('--paper_name',type=str)
+parser.add_argument('--output_dir',type=str, default="")
+args    = parser.parse_args()
+output_dir = args.output_dir
+with open(f'{output_dir}/planning_trajectories.json', encoding='utf8') as f:
+    traj = json.load(f)
+yaml_raw_content = ""
+for turn_idx, turn in enumerate(traj):
+        if turn_idx == 8:
+            yaml_raw_content = turn['content']
+if "</think>" in yaml_raw_content:
+    yaml_raw_content = yaml_raw_content.split("</think>")[-1]
+match = re.search(r"```yaml\n(.*?)\n```", yaml_raw_content, re.DOTALL)
+if match:
+    yaml_content = match.group(1)
+    with open(f'{output_dir}/planning_config.yaml', 'w', encoding='utf8') as f:
+        f.write(yaml_content)
+else:
+    # print("No YAML content found.")
+    match2 = re.search(r"```yaml\\n(.*?)\\n```", yaml_raw_content, re.DOTALL)
+    if match2:
+        yaml_content = match2.group(1)
+        with open(f'{output_dir}/planning_config.yaml', 'w', encoding='utf8') as f:
+            f.write(yaml_content)
+    else:
+        print("No YAML content found.")
+# ---------------------------------------
+artifact_output_dir=f"{output_dir}/planning_artifacts"
+os.makedirs(artifact_output_dir, exist_ok=True)
+context_lst = extract_planning(f'{output_dir}/planning_trajectories.json')
+arch_design = content_to_json(context_lst[1])
+logic_design = content_to_json(context_lst[2])
+formatted_arch_design = format_json_data(arch_design)
+formatted_logic_design = format_json_data(logic_design)
+with open(f"{artifact_output_dir}/1.1_overall_plan.txt", "w", encoding="utf-8") as f:
+    f.write(context_lst[0])
+with open(f"{artifact_output_dir}/1.2_arch_design.txt", "w", encoding="utf-8") as f:
+    f.write(formatted_arch_design)
+with open(f"{artifact_output_dir}/1.3_logic_design.txt", "w", encoding="utf-8") as f:
+    f.write(formatted_logic_design)
+shutil.copy(f"{output_dir}/planning_config.yaml", f"{artifact_output_dir}/1.4_config.yaml")

codes/1_planning.py ADDED Viewed

	@@ -0,0 +1,385 @@

+import json
+from tqdm import tqdm
+import argparse
+import os
+import sys
+from utils import print_response, print_log_cost, load_accumulated_cost, save_accumulated_cost
+from rate_limiter import RateLimiter, estimate_tokens
+from llm_provider import get_provider, get_default_model
+parser = argparse.ArgumentParser()
+# Support both paper and blog inputs
+parser.add_argument('--paper_name', type=str, help='Name of the paper (deprecated, use --content_name)')
+parser.add_argument('--blog_name', type=str, help='Name of the blog')
+parser.add_argument('--content_name', type=str, help='Name of the content (paper or blog)')
+parser.add_argument('--gpt_version', type=str, help='Model version (deprecated, use --model)')
+parser.add_argument('--model', type=str, help='Model name (e.g., gpt-4o-mini, gemini-1.5-flash)')
+parser.add_argument('--provider', type=str, default='gemini', choices=['openai', 'gemini', 'gemma'], help='LLM provider to use')
+parser.add_argument('--paper_format', type=str, default="JSON", choices=["JSON", "LaTeX"], help='Format for papers')
+parser.add_argument('--blog_format', type=str, default="JSON", choices=["JSON", "Markdown", "HTML"], help='Format for blogs')
+parser.add_argument('--content_format', type=str, default="JSON", help='Format of the content')
+parser.add_argument('--pdf_json_path', type=str, help='Path to paper JSON file')
+parser.add_argument('--pdf_latex_path', type=str, help='Path to paper LaTeX file')
+parser.add_argument('--blog_json_path', type=str, help='Path to blog JSON file')
+parser.add_argument('--blog_md_path', type=str, help='Path to blog Markdown file')
+parser.add_argument('--blog_html_path', type=str, help='Path to blog HTML file')
+parser.add_argument('--content_type', type=str, default="paper", choices=["paper", "blog"], help='Type of content to process')
+parser.add_argument('--output_dir', type=str, default="")
+args = parser.parse_args()
+# Initialize LLM provider
+provider_name = args.provider
+llm_provider = get_provider(provider_name)
+model = args.model or args.gpt_version or get_default_model(provider_name)
+print(f"🤖 Using {provider_name.upper()} with model: {model}")
+# Determine content type and set variables
+if args.blog_name or args.blog_json_path or args.blog_md_path or args.blog_html_path:
+    content_type = "blog"
+    content_name = args.blog_name or args.content_name or "BlogPost"
+    content_format = args.blog_format or args.content_format
+    content_path = args.blog_json_path or args.blog_md_path or args.blog_html_path
+else:
+    content_type = args.content_type
+    content_name = args.paper_name or args.content_name or "Paper"
+    content_format = args.paper_format or args.content_format
+    content_path = args.pdf_json_path or args.pdf_latex_path
+gpt_version = args.gpt_version
+output_dir = args.output_dir
+# Create output directory if it doesn't exist
+os.makedirs(output_dir, exist_ok=True)
+# Load content based on format
+if content_format in ["JSON"]:
+    with open(f'{content_path}') as f:
+        content_data = json.load(f)
+elif content_format in ["LaTeX", "Markdown", "HTML"]:
+    with open(f'{content_path}') as f:
+        content_data = f.read()
+else:
+    print(f"[ERROR] Invalid format. Please select JSON, LaTeX, Markdown, or HTML.")
+    sys.exit(0)
+if content_type == "blog":
+    plan_msg = [
+        {'role': "system", "content": f"""You are an expert software engineer and technical content analyst with deep understanding of tutorial implementation and code reproduction.
+You will receive a technical blog post in {content_format} format.
+Your task is to create a detailed and efficient plan to implement the code, algorithms, or systems described in the blog post.
+This plan should align precisely with the blog's tutorial steps, code examples, and technical specifications.
+Instructions:
+1. Align with the Blog: Your plan must strictly follow the methods, code examples, configurations, and implementation steps described in the blog.
+2. Extract Code Snippets: Identify and organize any existing code snippets from the blog.
+3. Fill Gaps: Identify missing implementation details that need to be inferred or completed.
+4. Be Clear and Structured: Present the plan in a well-organized and easy-to-follow format, breaking it down into actionable steps.
+5. Prioritize Efficiency: Optimize the plan for clarity and practical implementation while ensuring fidelity to the original tutorial.
+6. Add Production Features: Plan for error handling, logging, testing, and documentation that may not be in the blog."""},
+        {"role": "user",
+         "content" : f"""## Blog Post
+{content_data}
+## Task
+1. We want to implement the tutorial/system described in this blog post.
+2. The blog may contain partial code snippets that we need to organize and complete.
+3. Before writing the final code, please outline a comprehensive plan that covers:
+   - Key implementation steps from the blog
+   - Code architecture and structure
+   - Dependencies and libraries mentioned
+   - Configuration requirements
+   - Any code snippets already provided in the blog
+   - Missing details that need to be inferred or completed
+4. The plan should be as **detailed and practical** as possible to help us write production-ready code.
+## Requirements
+- Extract and organize any existing code snippets from the blog
+- Identify gaps in the blog's explanation that need to be filled
+- Focus on creating a **working, complete implementation**
+- If something is unclear from the blog, mention it explicitly and suggest reasonable defaults
+## Instruction
+The response should give us a strong roadmap for turning this blog tutorial into production code."""}]
+else:
+    plan_msg = [
+        {'role': "system", "content": f"""You are an expert researcher and strategic planner with a deep understanding of experimental design and reproducibility in scientific research.
+You will receive a research paper in {content_format} format.
+Your task is to create a detailed and efficient plan to reproduce the experiments and methodologies described in the paper.
+This plan should align precisely with the paper's methodology, experimental setup, and evaluation metrics.
+Instructions:
+1. Align with the Paper: Your plan must strictly follow the methods, datasets, model configurations, hyperparameters, and experimental setups described in the paper.
+2. Be Clear and Structured: Present the plan in a well-organized and easy-to-follow format, breaking it down into actionable steps.
+3. Prioritize Efficiency: Optimize the plan for clarity and practical implementation while ensuring fidelity to the original experiments."""},
+        {"role": "user",
+         "content" : f"""## Paper
+{content_data}
+## Task
+1. We want to reproduce the method described in the attached paper.
+2. The authors did not release any official code, so we have to plan our own implementation.
+3. Before writing any Python code, please outline a comprehensive plan that covers:
+   - Key details from the paper's **Methodology**.
+   - Important aspects of **Experiments**, including dataset requirements, experimental settings, hyperparameters, or evaluation metrics.
+4. The plan should be as **detailed and informative** as possible to help us write the final code later.
+## Requirements
+- You don't need to provide the actual code yet; focus on a **thorough, clear strategy**.
+- If something is unclear from the paper, mention it explicitly.
+## Instruction
+The response should give us a strong roadmap, making it easier to write the code later."""}]
+file_list_msg = [
+        {"role": "user", "content": """Your goal is to create a concise, usable, and complete software system design for reproducing the paper's method. Use appropriate open-source libraries and keep the overall architecture simple.
+Based on the plan for reproducing the paper’s main method, please design a concise, usable, and complete software system.
+Keep the architecture simple and make effective use of open-source libraries.
+-----
+## Format Example
+[CONTENT]
+{
+    "Implementation approach": "We will ... ,
+    "File list": [
+        "main.py",
+        "dataset_loader.py",
+        "model.py",
+        "trainer.py",
+        "evaluation.py"
+    ],
+    "Data structures and interfaces": "\nclassDiagram\n    class Main {\n        +__init__()\n        +run_experiment()\n    }\n    class DatasetLoader {\n        +__init__(config: dict)\n        +load_data() -> Any\n    }\n    class Model {\n        +__init__(params: dict)\n        +forward(x: Tensor) -> Tensor\n    }\n    class Trainer {\n        +__init__(model: Model, data: Any)\n        +train() -> None\n    }\n    class Evaluation {\n        +__init__(model: Model, data: Any)\n        +evaluate() -> dict\n    }\n    Main --> DatasetLoader\n    Main --> Trainer\n    Main --> Evaluation\n    Trainer --> Model\n",
+    "Program call flow": "\nsequenceDiagram\n    participant M as Main\n    participant DL as DatasetLoader\n    participant MD as Model\n    participant TR as Trainer\n    participant EV as Evaluation\n    M->>DL: load_data()\n    DL-->>M: return dataset\n    M->>MD: initialize model()\n    M->>TR: train(model, dataset)\n    TR->>MD: forward(x)\n    MD-->>TR: predictions\n    TR-->>M: training complete\n    M->>EV: evaluate(model, dataset)\n    EV->>MD: forward(x)\n    MD-->>EV: predictions\n    EV-->>M: metrics\n",
+    "Anything UNCLEAR": "Need clarification on the exact dataset format and any specialized hyperparameters."
+}
+[/CONTENT]
+## Nodes: "<node>: <type>  # <instruction>"
+- Implementation approach: <class 'str'>  # Summarize the chosen solution strategy.
+- File list: typing.List[str]  # Only need relative paths. ALWAYS write a main.py or app.py here.
+- Data structures and interfaces: typing.Optional[str]  # Use mermaid classDiagram code syntax, including classes, method(__init__ etc.) and functions with type annotations, CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design.
+- Program call flow: typing.Optional[str] # Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT.
+- Anything UNCLEAR: <class 'str'>  # Mention ambiguities and ask for clarifications.
+## Constraint
+Format: output wrapped inside [CONTENT][/CONTENT] like the format example, nothing else.
+## Action
+Follow the instructions for the nodes, generate the output, and ensure it follows the format example."""}
+    ]
+task_list_msg = [
+        {'role': 'user', 'content': """Your goal is break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies.
+You will break down tasks, analyze dependencies.
+You outline a clear PRD/technical design for reproducing the paper’s method and experiments.
+Now, let's break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies.
+The Logic Analysis should not only consider the dependencies between files but also provide detailed descriptions to assist in writing the code needed to reproduce the paper.
+-----
+## Format Example
+[CONTENT]
+{
+    "Required packages": [
+        "numpy==1.21.0",
+        "torch==1.9.0"
+    ],
+    "Required Other language third-party packages": [
+        "No third-party dependencies required"
+    ],
+    "Logic Analysis": [
+        [
+            "data_preprocessing.py",
+            "DataPreprocessing class ........"
+        ],
+        [
+            "trainer.py",
+            "Trainer ....... "
+        ],
+        [
+            "dataset_loader.py",
+            "Handles loading and ........"
+        ],
+        [
+            "model.py",
+            "Defines the model ......."
+        ],
+        [
+            "evaluation.py",
+            "Evaluation class ........ "
+        ],
+        [
+            "main.py",
+            "Entry point  ......."
+        ]
+    ],
+    "Task list": [
+        "dataset_loader.py",
+        "model.py",
+        "trainer.py",
+        "evaluation.py",
+        "main.py"
+    ],
+    "Full API spec": "openapi: 3.0.0 ...",
+    "Shared Knowledge": "Both data_preprocessing.py and trainer.py share ........",
+    "Anything UNCLEAR": "Clarification needed on recommended hardware configuration for large-scale experiments."
+}
+[/CONTENT]
+## Nodes: "<node>: <type>  # <instruction>"
+- Required packages: typing.Optional[typing.List[str]]  # Provide required third-party packages in requirements.txt format.(e.g., 'numpy==1.21.0').
+- Required Other language third-party packages: typing.List[str]  # List down packages required for non-Python languages. If none, specify "No third-party dependencies required".
+- Logic Analysis: typing.List[typing.List[str]]  # Provide a list of files with the classes/methods/functions to be implemented, including dependency analysis and imports. Include as much detailed description as possible.
+- Task list: typing.List[str]  # Break down the tasks into a list of filenames, prioritized based on dependency order. The task list must include the previously generated file list.
+- Full API spec: <class 'str'>  # Describe all APIs using OpenAPI 3.0 spec that may be used by both frontend and backend. If front-end and back-end communication is not required, leave it blank.
+- Shared Knowledge: <class 'str'>  # Detail any shared knowledge, like common utility functions or configuration variables.
+- Anything UNCLEAR: <class 'str'>  # Mention any unresolved questions or clarifications needed from the paper or project scope.
+## Constraint
+Format: output wrapped inside [CONTENT][/CONTENT] like the format example, nothing else.
+## Action
+Follow the node instructions above, generate your output accordingly, and ensure it follows the given format example."""}]
+# config
+config_msg = [
+        {'role': 'user', 'content': """You write elegant, modular, and maintainable code. Adhere to Google-style guidelines.
+Based on the paper, plan, design specified previously, follow the "Format Example" and generate the code.
+Extract the training details from the above paper (e.g., learning rate, batch size, epochs, etc.), follow the "Format example" and generate the code.
+DO NOT FABRICATE DETAILS — only use what the paper provides.
+You must write `config.yaml`.
+ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Your output format must follow the example below exactly.
+-----
+# Format Example
+## Code: config.yaml
+```yaml
+## config.yaml
+training:
+  learning_rate: ...
+  batch_size: ...
+  epochs: ...
+...
+```
+-----
+## Code: config.yaml
+"""
+    }]
+def api_call(msg, model_name):
+    """Make API call using the configured provider"""
+    # Special handling for o3-mini reasoning effort
+    if "o3-mini" in model_name and provider_name == 'openai':
+        completion = llm_provider.create_completion(
+            messages=msg,
+            model=model_name,
+            reasoning_effort="high"
+        )
+    else:
+        completion = llm_provider.create_completion(
+            messages=msg,
+            model=model_name
+        )
+    return completion
+responses = []
+trajectories = []
+total_accumulated_cost = 0
+# Initialize rate limiter to avoid hitting TPM limits
+rate_limiter = RateLimiter(max_tokens_per_minute=95000)  # 95K with 5K buffer
+print("🛡️  Rate limiter initialized (95K TPM limit)")
+for idx, instruction_msg in enumerate([plan_msg, file_list_msg, task_list_msg, config_msg]):
+    current_stage = ""
+    if idx == 0 :
+        current_stage = f"[Planning] Overall plan"
+    elif idx == 1:
+        current_stage = f"[Planning] Architecture design"
+    elif idx == 2:
+        current_stage = f"[Planning] Logic design"
+    elif idx == 3:
+        current_stage = f"[Planning] Configuration file generation"
+    print(current_stage)
+    trajectories.extend(instruction_msg)
+    # Estimate tokens for this request and wait if needed
+    estimated_tokens = estimate_tokens(str(trajectories))
+    rate_limiter.wait_if_needed(estimated_tokens)
+    completion = api_call(trajectories, model)
+    # Extract response text using provider abstraction
+    response_text = llm_provider.get_response_text(completion)
+    usage_info = llm_provider.get_usage_info(completion)
+    # Create completion JSON for logging (compatible format)
+    completion_json = {
+        'choices': [{'message': {'role': 'assistant', 'content': response_text}}],
+        'usage': usage_info,
+        'model': model
+    }
+    # print and logging
+    print_response(completion_json)
+    temp_total_accumulated_cost = print_log_cost(completion_json, model, current_stage, output_dir, total_accumulated_cost)
+    total_accumulated_cost = temp_total_accumulated_cost
+    responses.append(completion_json)
+    # trajectories
+    message = {'role': 'assistant', 'content': response_text}
+    trajectories.append(message)
+# save
+save_accumulated_cost(f"{output_dir}/accumulated_cost.json", total_accumulated_cost)
+# Print rate limiter statistics
+rate_limiter.print_stats()
+os.makedirs(output_dir, exist_ok=True)
+with open(f'{output_dir}/planning_response.json', 'w') as f:
+    json.dump(responses, f)
+with open(f'{output_dir}/planning_trajectories.json', 'w') as f:
+    json.dump(trajectories, f)
+# Export planning as markdown for easy reference
+print("\n📝 Exporting planning to markdown...")
+with open(f'{output_dir}/planning_output.md', 'w', encoding='utf-8') as f:
+    f.write(f"# Planning Output for {content_name}\n\n")
+    f.write(f"**Model:** {model}\n")
+    f.write(f"**Provider:** {provider_name}\n")
+    f.write(f"**Content Type:** {content_type}\n\n")
+    f.write("---\n\n")
+    for idx, response in enumerate(responses):
+        stage_names = ["Overall Plan", "Architecture Design", "Logic Design", "Configuration"]
+        stage_name = stage_names[idx] if idx < len(stage_names) else f"Stage {idx+1}"
+        f.write(f"## {stage_name}\n\n")
+        content = response['choices'][0]['message']['content']
+        f.write(content)
+        f.write("\n\n---\n\n")
+print(f"✅ Planning saved to: {output_dir}/planning_output.md")

codes/2_analyzing.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import json
+import os
+from tqdm import tqdm
+import sys
+from utils import extract_planning, content_to_json, print_response, print_log_cost, load_accumulated_cost, save_accumulated_cost
+from llm_provider import get_provider, get_default_model
+import copy
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument('--paper_name', type=str)
+parser.add_argument('--gpt_version', type=str, help='Model version (deprecated, use --model)')
+parser.add_argument('--model', type=str, help='Model name')
+parser.add_argument('--provider', type=str, default='gemini', choices=['openai', 'gemini', 'gemma'], help='LLM provider')
+parser.add_argument('--paper_format', type=str, default="JSON", choices=["JSON", "LaTeX"])
+parser.add_argument('--pdf_json_path', type=str)
+parser.add_argument('--pdf_latex_path', type=str)
+parser.add_argument('--output_dir', type=str, default="")
+args = parser.parse_args()
+# Initialize LLM provider
+provider_name = args.provider
+llm_provider = get_provider(provider_name)
+model = args.model or args.gpt_version or get_default_model(provider_name)
+print(f"🤖 Using {provider_name.upper()} with model: {model}")
+paper_name = args.paper_name
+gpt_version = args.gpt_version # Keep for backward compatibility if needed in print_log_cost or other places
+paper_format = args.paper_format
+pdf_json_path = args.pdf_json_path
+pdf_latex_path = args.pdf_latex_path
+output_dir = args.output_dir
+if paper_format == "JSON":
+    with open(f'{pdf_json_path}') as f:
+        paper_content = json.load(f)
+elif paper_format == "LaTeX":
+    with open(f'{pdf_latex_path}') as f:
+        paper_content = f.read()
+else:
+    print(f"[ERROR] Invalid paper format. Please select either 'JSON' or 'LaTeX.")
+    sys.exit(0)
+with open(f'{output_dir}/planning_config.yaml') as f:
+    config_yaml = f.read()
+context_lst = extract_planning(f'{output_dir}/planning_trajectories.json')
+# 0: overview, 1: detailed, 2: PRD
+if os.path.exists(f'{output_dir}/task_list.json'):
+    with open(f'{output_dir}/task_list.json') as f:
+        task_list = json.load(f)
+else:
+    task_list = content_to_json(context_lst[2])
+if 'Task list' in task_list:
+    todo_file_lst = task_list['Task list']
+elif 'task_list' in task_list:
+    todo_file_lst = task_list['task_list']
+elif 'task list' in task_list:
+    todo_file_lst = task_list['task list']
+else:
+    print(f"[ERROR] 'Task list' does not exist. Please re-generate the planning.")
+    sys.exit(0)
+if 'Logic Analysis' in task_list:
+    logic_analysis = task_list['Logic Analysis']
+elif 'logic_analysis' in task_list:
+    logic_analysis = task_list['logic_analysis']
+elif 'logic analysis' in task_list:
+    logic_analysis = task_list['logic analysis']
+else:
+    print(f"[ERROR] 'Logic Analysis' does not exist. Please re-generate the planning.")
+    sys.exit(0)
+done_file_lst = ['config.yaml']
+logic_analysis_dict = {}
+for desc in task_list['Logic Analysis']:
+    logic_analysis_dict[desc[0]] = desc[1]
+analysis_msg = [
+    {"role": "system", "content": f"""You are an expert researcher, strategic analyzer and software engineer with a deep understanding of experimental design and reproducibility in scientific research.
+You will receive a research paper in {paper_format} format, an overview of the plan, a design in JSON format consisting of "Implementation approach", "File list", "Data structures and interfaces", and "Program call flow", followed by a task in JSON format that includes "Required packages", "Required other language third-party packages", "Logic Analysis", and "Task list", along with a configuration file named "config.yaml".
+Your task is to conduct a comprehensive logic analysis to accurately reproduce the experiments and methodologies described in the research paper.
+This analysis must align precisely with the paper’s methodology, experimental setup, and evaluation criteria.
+1. Align with the Paper: Your analysis must strictly follow the methods, datasets, model configurations, hyperparameters, and experimental setups described in the paper.
+2. Be Clear and Structured: Present your analysis in a logical, well-organized, and actionable format that is easy to follow and implement.
+3. Prioritize Efficiency: Optimize the analysis for clarity and practical implementation while ensuring fidelity to the original experiments.
+4. Follow design: YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.
+5. REFER TO CONFIGURATION: Always reference settings from the config.yaml file. Do not invent or assume any values—only use configurations explicitly provided.
+"""}]
+def get_write_msg(todo_file_name, todo_file_desc):
+    draft_desc = f"Write the logic analysis in '{todo_file_name}', which is intended for '{todo_file_desc}'."
+    if len(todo_file_desc.strip()) == 0:
+        draft_desc = f"Write the logic analysis in '{todo_file_name}'."
+    write_msg=[{'role': 'user', "content": f"""## Paper
+{paper_content}
+-----
+## Overview of the plan
+{context_lst[0]}
+-----
+## Design
+{context_lst[1]}
+-----
+## Task
+{context_lst[2]}
+-----
+## Configuration file
+```yaml
+{config_yaml}
+```
+-----
+## Instruction
+Conduct a Logic Analysis to assist in writing the code, based on the paper, the plan, the design, the task and the previously specified configuration file (config.yaml).
+You DON'T need to provide the actual code yet; focus on a thorough, clear analysis.
+{draft_desc}
+-----
+## Logic Analysis: {todo_file_name}"""}]
+    return write_msg
+def api_call(msg):
+    """Make API call using the configured provider"""
+    if "o3-mini" in model and provider_name == 'openai':
+        completion = llm_provider.create_completion(
+            messages=msg,
+            model=model,
+            reasoning_effort="high"
+        )
+    else:
+        completion = llm_provider.create_completion(
+            messages=msg,
+            model=model
+        )
+    return completion
+artifact_output_dir=f'{output_dir}/analyzing_artifacts'
+os.makedirs(artifact_output_dir, exist_ok=True)
+total_accumulated_cost = load_accumulated_cost(f"{output_dir}/accumulated_cost.json")
+for todo_file_name in tqdm(todo_file_lst):
+    responses = []
+    trajectories = copy.deepcopy(analysis_msg)
+    current_stage=f"[ANALYSIS] {todo_file_name}"
+    print(current_stage)
+    if todo_file_name == "config.yaml":
+        continue
+    if todo_file_name not in logic_analysis_dict:
+        # print(f"[DEBUG ANALYSIS] {paper_name} {todo_file_name} is not exist in the logic analysis")
+        logic_analysis_dict[todo_file_name] = ""
+    instruction_msg = get_write_msg(todo_file_name, logic_analysis_dict[todo_file_name])
+    trajectories.extend(instruction_msg)
+    completion = llm_provider.create_completion(
+        messages=trajectories,
+        model=model
+    )
+    # Extract response using provider abstraction
+    response_text = llm_provider.get_response_text(completion)
+    usage_info = llm_provider.get_usage_info(completion)
+    # Create completion JSON for logging
+    completion_json = {
+        'choices': [{'message': {'role': 'assistant', 'content': response_text}}],
+        'usage': usage_info,
+        'model': model
+    }
+    # print and logging
+    print_response(completion_json)
+    temp_total_accumulated_cost = print_log_cost(completion_json, model, current_stage, output_dir, total_accumulated_cost)
+    total_accumulated_cost = temp_total_accumulated_cost
+    responses.append(completion_json)
+    # trajectories
+    message = {'role': 'assistant', 'content': response_text}
+    trajectories.append(message)
+    # save - create subdirectories if needed
+    artifact_file_path = f'{artifact_output_dir}/{todo_file_name}_simple_analysis.txt'
+    artifact_file_dir = os.path.dirname(artifact_file_path)
+    os.makedirs(artifact_file_dir, exist_ok=True)
+    with open(artifact_file_path, 'w') as f:
+        f.write(completion_json['choices'][0]['message']['content'])
+    done_file_lst.append(todo_file_name)
+    # save for next stage(coding)
+    todo_file_name = todo_file_name.replace("/", "_")
+    with open(f'{output_dir}/{todo_file_name}_simple_analysis_response.json', 'w') as f:
+        json.dump(responses, f)
+    with open(f'{output_dir}/{todo_file_name}_simple_analysis_trajectories.json', 'w') as f:
+        json.dump(trajectories, f)
+save_accumulated_cost(f"{output_dir}/accumulated_cost.json", total_accumulated_cost)

codes/3_coding.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import json
+import os
+from tqdm import tqdm
+import re
+import sys
+import copy
+from utils import extract_planning, content_to_json, extract_code_from_content, print_response, print_log_cost, load_accumulated_cost, save_accumulated_cost
+from llm_provider import get_provider, get_default_model
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument('--paper_name', type=str)
+parser.add_argument('--gpt_version', type=str, default="o3-mini", help='Model version (deprecated, use --model)')
+parser.add_argument('--model', type=str, help='Model name')
+parser.add_argument('--provider', type=str, default='gemini', choices=['openai', 'gemini', 'gemma'], help='LLM provider')
+parser.add_argument('--paper_format', type=str, default="JSON", choices=["JSON", "LaTeX"])
+parser.add_argument('--pdf_json_path', type=str) # json format
+parser.add_argument('--pdf_latex_path', type=str) # latex format
+parser.add_argument('--output_dir',type=str, default="")
+parser.add_argument('--output_repo_dir',type=str, default="")
+args = parser.parse_args()
+# Initialize LLM provider
+provider_name = args.provider
+llm_provider = get_provider(provider_name)
+model = args.model or args.gpt_version or get_default_model(provider_name)
+print(f"🤖 Using {provider_name.upper()} with model: {model}")
+paper_name = args.paper_name
+gpt_version = args.gpt_version
+paper_format = args.paper_format
+pdf_json_path = args.pdf_json_path
+pdf_latex_path = args.pdf_latex_path
+output_dir = args.output_dir
+output_repo_dir = args.output_repo_dir
+if paper_format == "JSON":
+    with open(f'{pdf_json_path}') as f:
+        paper_content = json.load(f)
+elif paper_format == "LaTeX":
+    with open(f'{pdf_latex_path}') as f:
+        paper_content = f.read()
+else:
+    print(f"[ERROR] Invalid paper format. Please select either 'JSON' or 'LaTeX.")
+    sys.exit(0)
+with open(f'{output_dir}/planning_config.yaml') as f:
+    config_yaml = f.read()
+context_lst = extract_planning(f'{output_dir}/planning_trajectories.json')
+# 0: overview, 1: detailed, 2: PRD
+# file_list = content_to_json(context_lst[1])
+task_list = content_to_json(context_lst[2])
+todo_file_lst = task_list['Task list']
+done_file_lst = ['config.yaml']
+done_file_dict = {}
+code_msg = [
+    {"role": "system", "content": f"""You are an expert researcher and software engineer with a deep understanding of experimental design and reproducibility in scientific research.
+You will receive a research paper in {paper_format} format, an overview of the plan, a Design in JSON format consisting of "Implementation approach", "File list", "Data structures and interfaces", and "Program call flow", followed by a Task in JSON format that includes "Required packages", "Required other language third-party packages", "Logic Analysis", and "Task list", along with a configuration file named "config.yaml".
+Your task is to write code to reproduce the experiments and methodologies described in the paper.
+The code you write must be elegant, modular, and maintainable, adhering to Google-style guidelines.
+The code must strictly align with the paper's methodology, experimental setup, and evaluation metrics.
+Write code with triple quoto."""}]
+def get_write_msg(todo_file_name, detailed_logic_analysis, done_file_lst):
+    code_files = ""
+    for done_file in done_file_lst:
+        if done_file.endswith(".yaml"): continue
+        code_files += f"""
+```python
+{done_file_dict[done_file]}
+```
+"""
+    write_msg=[
+{'role': 'user', "content": f"""# Context
+## Paper
+{paper_content}
+-----
+## Overview of the plan
+{context_lst[0]}
+-----
+## Design
+{context_lst[1]}
+-----
+## Task
+{context_lst[2]}
+-----
+## Configuration file
+```yaml
+{config_yaml}
+```
+-----
+## Code Files
+{code_files}
+-----
+# Format example
+## Code: {todo_file_name}
+```python
+## {todo_file_name}
+...
+```
+-----
+# Instruction
+Based on the paper, plan, design, task and configuration file(config.yaml) specified previously, follow "Format example", write the code.
+We have {done_file_lst}.
+Next, you must write only the "{todo_file_name}".
+1. Only One file: do your best to implement THIS ONLY ONE FILE.
+2. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets.
+3. Set default value: If there is any setting, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE. AVOID circular import.
+4. Follow design: YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.
+5. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE.
+6. Before using a external variable/module, make sure you import it first.
+7. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.
+8. REFER TO CONFIGURATION: you must use configuration from "config.yaml". DO NOT FABRICATE any configuration values.
+{detailed_logic_analysis}
+## Code: {todo_file_name}"""}]
+    return write_msg
+def api_call(msg):
+    """Make API call using the configured provider"""
+    if "o3-mini" in model and provider_name == 'openai':
+        completion = llm_provider.create_completion(
+            messages=msg,
+            model=model,
+            reasoning_effort="high"
+        )
+    else:
+        completion = llm_provider.create_completion(
+            messages=msg,
+            model=model
+        )
+    return completion
+# testing for checking
+detailed_logic_analysis_dict = {}
+retrieved_section_dict = {}
+for todo_file_name in todo_file_lst:
+    # simple analysis
+    save_todo_file_name = todo_file_name.replace("/", "_")
+    if todo_file_name == "config.yaml":
+        continue
+    with open(f"{output_dir}/{save_todo_file_name}_simple_analysis_response.json") as f:
+        detailed_logic_analysis_response = json.load(f)
+    detailed_logic_analysis_dict[todo_file_name] = detailed_logic_analysis_response[0]['choices'][0]['message']['content']
+artifact_output_dir=f'{output_dir}/coding_artifacts'
+os.makedirs(artifact_output_dir, exist_ok=True)
+total_accumulated_cost = load_accumulated_cost(f"{output_dir}/accumulated_cost.json")
+for todo_idx, todo_file_name in enumerate(tqdm(todo_file_lst)):
+    responses = []
+    trajectories = copy.deepcopy(code_msg)
+    current_stage = f"[CODING] {todo_file_name}"
+    print(current_stage)
+    if todo_file_name == "config.yaml":
+        continue
+    instruction_msg = get_write_msg(todo_file_name, detailed_logic_analysis_dict[todo_file_name], done_file_lst)
+    trajectories.extend(instruction_msg)
+    completion = api_call(trajectories)
+    # Extract response using provider abstraction
+    response_text = llm_provider.get_response_text(completion)
+    usage_info = llm_provider.get_usage_info(completion)
+    # Create completion JSON for logging
+    completion_json = {
+        'choices': [{'message': {'role': 'assistant', 'content': response_text}}],
+        'usage': usage_info,
+        'model': model
+    }
+    # print and logging
+    print_response(completion_json)
+    temp_total_accumulated_cost = print_log_cost(completion_json, model, current_stage, output_dir, total_accumulated_cost)
+    total_accumulated_cost = temp_total_accumulated_cost
+    responses.append(completion_json)
+    # trajectories
+    message = {'role': 'assistant', 'content': response_text}
+    trajectories.append(message)
+    done_file_lst.append(todo_file_name)
+    # save
+    # save_dir_name = f"{paper_name}_repo"
+    os.makedirs(f'{output_repo_dir}', exist_ok=True)
+    save_todo_file_name = todo_file_name.replace("/", "_")
+    # save artifacts - create subdirectories if needed
+    artifact_file_path = f'{artifact_output_dir}/{save_todo_file_name}_coding.txt'
+    artifact_file_dir = os.path.dirname(artifact_file_path)
+    os.makedirs(artifact_file_dir, exist_ok=True)
+    with open(artifact_file_path, 'w') as f:
+        f.write(completion_json['choices'][0]['message']['content'])
+    # extract code save
+    code = extract_code_from_content(completion_json['choices'][0]['message']['content'])
+    if len(code) == 0:
+        code = completion_json['choices'][0]['message']['content']
+    done_file_dict[todo_file_name] = code
+    if save_todo_file_name != todo_file_name:
+        todo_file_dir = '/'.join(todo_file_name.split("/")[:-1])
+        os.makedirs(f"{output_repo_dir}/{todo_file_dir}", exist_ok=True)
+    # save code file - create subdirectories if needed
+    code_file_path = f"{output_repo_dir}/{todo_file_name}"
+    code_file_dir = os.path.dirname(code_file_path)
+    os.makedirs(code_file_dir, exist_ok=True)
+    with open(code_file_path, 'w') as f:
+        f.write(code)
+save_accumulated_cost(f"{output_dir}/accumulated_cost.json", total_accumulated_cost)

codes/eval.py ADDED Viewed

	@@ -0,0 +1,277 @@

+from openai import OpenAI
+import json
+import os
+import sys
+import argparse
+from utils import read_python_files, extract_planning, content_to_json, \
+        num_tokens_from_messages, read_all_files, extract_json_from_string, get_now_str, print_log_cost
+client = OpenAI(api_key = os.environ["OPENAI_API_KEY"])
+def api_call(request_json):
+    completion = client.chat.completions.create(**request_json)
+    return completion
+def main(args):
+    paper_name = args.paper_name
+    pdf_json_path = args.pdf_json_path
+    output_dir = args.output_dir
+    target_repo_dir = args.target_repo_dir
+    eval_result_dir = args.eval_result_dir
+    gpt_version = args.gpt_version
+    generated_n = args.generated_n
+    data_dir = args.data_dir
+    eval_type = args.eval_type
+    is_papercoder = True if args.papercoder else False
+    gold_repo_dir = args.gold_repo_dir
+    # paper
+    with open(f'{pdf_json_path}') as f:
+        paper_json = json.load(f)
+    codes = ""
+    if is_papercoder:
+        # python files
+        target_files_dict = read_python_files(target_repo_dir)
+        # configuration
+        with open(f'{output_dir}/planning_config.yaml') as f:
+            config_yaml = f.read()
+        context_lst = extract_planning(f'{output_dir}/planning_trajectories.json')
+        if os.path.exists(f'{output_dir}/task_list.json'):
+            with open(f'{output_dir}/task_list.json') as f:
+                task_list = json.load(f)
+        else:
+            task_list = content_to_json(context_lst[2])
+        todo_file_lst = task_list['Task list']
+        for todo_file in todo_file_lst:
+            if todo_file.endswith(".yaml"):
+                continue
+            codes += f"```python\n## File name: {todo_file}\n{target_files_dict[todo_file]}\n```\n\n"
+        codes += f"```yaml\n## File name: config.yaml\n{config_yaml}\n```\n\n"
+    else:
+        target_files_dict = read_all_files(target_repo_dir, allowed_ext=[".py", ".yaml", ".yml", ".md", ".sh", ".bash"], is_print=False)
+        for file_name, code in target_files_dict.items():
+            codes += f"```## File name: {file_name}\n{code}\n```\n\n"
+    prompt = open(f"{data_dir}/prompts/{eval_type}.txt").read()
+    cur_prompt = prompt.replace('{{Paper}}', f"{paper_json}").replace('{{Code}}', codes)
+    # refernce-based
+    if "ref_based" == eval_type and len(gold_repo_dir) > 0:
+        all_files_dict = read_all_files(gold_repo_dir, allowed_ext=[".py", ".yaml", ".yml", ".md", ".sh", ".bash"], is_print=False)
+        goldcodes = ""
+        gold_cnt = 0
+        if len(args.selected_file_path) > 0:
+            selected_file_lst = []
+            with open(args.selected_file_path) as f:
+                selected_file_lst = f.readlines()
+            for s_idx in range(len(selected_file_lst)):
+                selected_file_lst[s_idx] = selected_file_lst[s_idx].strip()
+            for all_file, all_file_code in all_files_dict.items():
+                if all_file not in selected_file_lst:
+                    continue
+                goldcodes += f"```## File name: {all_file}\n{all_file_code}\n```\n\n"
+                gold_cnt += 1
+        else:
+            for all_file, all_file_code in all_files_dict.items():
+                goldcodes += f"```## File name: {all_file}\n{all_file_code}\n```\n\n"
+                gold_cnt += 1
+        cur_prompt = cur_prompt.replace('{{GoldCode}}', f"{goldcodes}")
+    msg = [{"role": "system", "content": cur_prompt}]
+    try:
+        num_tokens = num_tokens_from_messages(msg)
+    except Exception as e:
+        print(f"[WARNING] An exception was raised while counting tokens for the target repository of {args.paper_name}.")
+        print(e)
+        print("-"*40)
+        num_tokens = 0
+    if num_tokens > 128000:
+        print(f"[ERROR] {args.paper_name} more than 128k")
+        sys.exit(0)
+    if "o3-mini" in gpt_version:
+        if generated_n > 8:
+            print(f"[WARNING] o3-mini does not support n > 8. Setting generated_n to 8.")
+            generated_n = 8
+        request_json = {
+                "model": gpt_version,
+                "messages": msg,
+                "reasoning_effort": "high",
+                "n": generated_n
+        }
+    else:
+        request_json = {
+                "model": gpt_version,
+                "messages": msg,
+                "temperature": 1,
+                "frequency_penalty": 0,
+                "presence_penalty": 0,
+                "stop": None,
+                "n": generated_n # 10
+        }
+    completion = api_call(request_json)
+    completion_json = json.loads(completion.model_dump_json())
+    score_key = "score"
+    rationale_key = "critique_list"
+    all_scores = []
+    rationales = []
+    for n in range(generated_n):
+        choice = completion_json['choices'][n]
+        output = choice['message']['content'].strip()
+        try:
+            output_json2 = json.loads(output)
+            score = int(output_json2[score_key])
+            if isinstance(output_json2[rationale_key], str):
+                rationale = output_json2[rationale_key]
+            else:
+                rationale = json.dumps(output_json2[rationale_key])
+        except Exception as e:
+            # print(e)
+            try:
+                output_json2 = json.loads(extract_json_from_string(output))
+                score = int(output_json2[score_key])
+                if isinstance(output_json2[rationale_key], str):
+                    rationale = output_json2[rationale_key]
+                else:
+                    rationale = json.dumps(output_json2[rationale_key])
+            except Exception as e2: # Parsing Error
+                print(f"[WARNING] Invalid repsponse: parsing error")
+                print(e2)
+                print("-"*40)
+                continue
+        # score
+        if score < 1 or score > 5:
+            print(f"[WARNING] Invalid repsponse: score {score}, Score must be in the range of 1–5.")
+            continue
+        all_scores.append(int(score))
+        rationales.append(rationale)
+    avg_score = sum(all_scores) / len(all_scores)
+    output_json= {
+        "paper_name": paper_name,
+        "target_repo_dir": target_repo_dir,
+        "eval_type": eval_type,
+        "gold_repo_dir": gold_repo_dir,
+        "generated_n": generated_n,
+        "request_json": request_json,
+        "completion_json": completion_json,
+        "eval_result": {
+            "score": avg_score,
+            "valid_n": len(all_scores),
+            "scroe_lst": all_scores,
+            "rationale_lst": rationales,
+        },
+    }
+    now_str = get_now_str()
+    os.makedirs(eval_result_dir, exist_ok=True)
+    with open(f"{eval_result_dir}/{paper_name}_eval_{eval_type}_{gpt_version}_{now_str}.json", 'w', encoding='utf-8') as f:
+        json.dump(output_json, f)
+    # ---------------
+    print()
+    print("=" * 40)
+    print("🌟 Evaluation Summary 🌟")
+    print(f"📄 Paper name: {paper_name}")
+    print(f"🧪 Evaluation type: {eval_type}")
+    print(f"📁 Target repo directory: {target_repo_dir}")
+    print(f"📊 Evaluation result:")
+    print(f"\t📈 Score: {avg_score:.4f}")
+    print(f"\t✅ Valid: {output_json['eval_result']['valid_n']}/{generated_n}")
+    print("=" * 40)
+    print_log_cost(completion_json, gpt_version, f"[Evaluation] {paper_name} - {eval_type}", output_dir, 0)
+    # ---------------
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument('--paper_name', type=str)
+    argparser.add_argument('--pdf_json_path', type=str)
+    argparser.add_argument('--data_dir',type=str, default="../data")
+    argparser.add_argument('--output_dir',type=str)
+    argparser.add_argument('--target_repo_dir', type=str)
+    argparser.add_argument('--gold_repo_dir', type=str, default="")
+    argparser.add_argument('--eval_result_dir',type=str)
+    argparser.add_argument('--eval_type', type=str, default="ref_free", choices=["ref_free", "ref_based"])
+    argparser.add_argument('--generated_n', type=int, default=8)
+    argparser.add_argument('--gpt_version', type=str, default="o3-mini")
+    argparser.add_argument('--selected_file_path', type=str, default="")
+    argparser.add_argument('--papercoder', action="store_true")
+    args = argparser.parse_args()
+    main(args)
+# ref-free
+# python eval.py \
+#     --paper_name Transformer \
+#     --pdf_json_path ../examples/Transformer_cleaned.json \
+#     --data_dir ../data \
+#     --output_dir ../outputs/Transformer \
+#     --target_repo_dir ../outputs/Transformer_repo \
+#     --eval_result_dir ../results \
+#     --eval_type ref_free \
+#     --generated_n 8 \
+#     --papercoder
+# ref-based
+# python eval.py \
+#     --paper_name Transformer \
+#     --pdf_json_path ../examples/Transformer_cleaned.json \
+#     --data_dir ../data \
+#     --output_dir ../outputs/Transformer \
+#     --target_repo_dir ../outputs/Transformer_repo \
+#     --gold_repo_dir ../examples/Transformer_gold_repo \
+#     --eval_result_dir ../results \
+#     --eval_type ref_based \
+#     --generated_n 8 \
+#     --papercoder

codes/example_use_gemma.py ADDED Viewed

	@@ -0,0 +1,27 @@

+"""
+Example: How to use Gemma provider in Blog2Code pipeline
+This demonstrates how to modify any of the pipeline scripts to use Gemma instead of OpenAI/Gemini.
+"""
+import os
+# Set the NVIDIA API key
+os.environ['NVIDIA_API_KEY'] = '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'
+# When running any pipeline script, simply change the provider_name to 'gemma'
+# For example, in 1_planning.py, 2_analyzing.py, or 3_coding.py:
+# OLD (using OpenAI):
+# provider_name = 'openai'
+# NEW (using Gemma):
+provider_name = 'gemma'
+# The rest of the code remains the same!
+# The scripts will automatically use the Gemma model with default settings.
+print(f"✅ Pipeline configured to use: {provider_name}")
+print("You can now run any of the pipeline scripts:")
+print("  - python 1_planning.py")
+print("  - python 2_analyzing.py")
+print("  - python 3_coding.py")

codes/llm_provider.py ADDED Viewed

	@@ -0,0 +1,342 @@

+"""
+LLM Provider abstraction layer for Blog2Code.
+Supports multiple LLM providers: OpenAI, Google Gemini, NVIDIA Gemma
+"""
+import os
+from typing import Dict, List, Any, Optional
+from abc import ABC, abstractmethod
+class LLMProvider(ABC):
+    """Base class for LLM providers"""
+    @abstractmethod
+    def create_completion(self, messages: List[Dict], model: str, **kwargs) -> Any:
+        """Create a chat completion"""
+        pass
+    @abstractmethod
+    def get_response_text(self, completion: Any) -> str:
+        """Extract text from completion response"""
+        pass
+    @abstractmethod
+    def get_usage_info(self, completion: Any) -> Dict:
+        """Extract token usage information"""
+        pass
+    @abstractmethod
+    def calculate_cost(self, usage: Dict, model: str) -> float:
+        """Calculate cost based on usage"""
+        pass
+class OpenAIProvider(LLMProvider):
+    """OpenAI API implementation"""
+    def __init__(self, api_key: Optional[str] = None):
+        from openai import OpenAI
+        self.client = OpenAI(api_key=api_key or os.environ.get("OPENAI_API_KEY"))
+    def create_completion(self, messages: List[Dict], model: str, **kwargs) -> Any:
+        """Create OpenAI chat completion"""
+        return self.client.chat.completions.create(
+            model=model,
+            messages=messages,
+            **kwargs
+        )
+    def get_response_text(self, completion: Any) -> str:
+        """Extract text from OpenAI response"""
+        return completion.choices[0].message.content
+    def get_usage_info(self, completion: Any) -> Dict:
+        """Extract usage from OpenAI response"""
+        return {
+            'prompt_tokens': completion.usage.prompt_tokens,
+            'completion_tokens': completion.usage.completion_tokens,
+            'total_tokens': completion.usage.total_tokens,
+            'cached_tokens': getattr(completion.usage.prompt_tokens_details, 'cached_tokens', 0) if hasattr(completion.usage, 'prompt_tokens_details') else 0
+        }
+    def calculate_cost(self, usage: Dict, model: str) -> float:
+        """Calculate OpenAI cost"""
+        # Pricing per 1M tokens
+        model_costs = {
+            "gpt-4o-mini": {"input": 0.150, "cached": 0.075, "output": 0.600},
+            "gpt-4o": {"input": 2.50, "cached": 1.25, "output": 10.00},
+            "gpt-3.5-turbo": {"input": 0.50, "cached": 0.25, "output": 1.50},
+            "o3-mini": {"input": 1.10, "cached": 0.55, "output": 4.40},
+        }
+        costs = model_costs.get(model, model_costs["gpt-4o-mini"])
+        prompt_tokens = usage['prompt_tokens']
+        cached_tokens = usage.get('cached_tokens', 0)
+        completion_tokens = usage['completion_tokens']
+        actual_input_tokens = prompt_tokens - cached_tokens
+        input_cost = (actual_input_tokens / 1_000_000) * costs["input"]
+        cached_cost = (cached_tokens / 1_000_000) * costs["cached"]
+        output_cost = (completion_tokens / 1_000_000) * costs["output"]
+        return input_cost + cached_cost + output_cost
+class GeminiProvider(LLMProvider):
+    """Google Gemini API implementation"""
+    def __init__(self, api_key: Optional[str] = None):
+        try:
+            import google.generativeai as genai
+            self.genai = genai
+            genai.configure(api_key=api_key or os.environ.get("GEMINI_API_KEY"))
+        except ImportError:
+            raise ImportError(
+                "google-generativeai not installed. "
+                "Install with: pip install google-generativeai"
+            )
+    def create_completion(self, messages: List[Dict], model: str, **kwargs) -> Any:
+        """Create Gemini chat completion"""
+        # Convert OpenAI message format to Gemini format
+        gemini_messages = self._convert_messages(messages)
+        # Fix model name - Gemini expects models/model-name format
+        if not model.startswith('models/'):
+            model = f'models/{model}'
+        # Create model
+        gemini_model = self.genai.GenerativeModel(model)
+        # Generate response
+        response = gemini_model.generate_content(
+            gemini_messages,
+            generation_config=self._get_generation_config(**kwargs)
+        )
+        return response
+    def _convert_messages(self, messages: List[Dict]) -> str:
+        """Convert OpenAI messages to Gemini prompt format"""
+        # Gemini uses a simpler format - concatenate all messages
+        prompt_parts = []
+        for msg in messages:
+            role = msg['role']
+            content = msg['content']
+            if role == 'system':
+                prompt_parts.append(f"System Instructions:\n{content}\n")
+            elif role == 'user':
+                prompt_parts.append(f"User:\n{content}\n")
+            elif role == 'assistant':
+                prompt_parts.append(f"Assistant:\n{content}\n")
+        return "\n".join(prompt_parts)
+    def _get_generation_config(self, **kwargs):
+        """Convert OpenAI kwargs to Gemini generation config"""
+        config = {}
+        # Map common parameters
+        if 'temperature' in kwargs:
+            config['temperature'] = kwargs['temperature']
+        if 'max_tokens' in kwargs:
+            config['max_output_tokens'] = kwargs['max_tokens']
+        if 'top_p' in kwargs:
+            config['top_p'] = kwargs['top_p']
+        return config
+    def get_response_text(self, completion: Any) -> str:
+        """Extract text from Gemini response"""
+        return completion.text
+    def get_usage_info(self, completion: Any) -> Dict:
+        """Extract usage from Gemini response"""
+        # Gemini provides token counts in metadata
+        try:
+            metadata = completion.usage_metadata
+            return {
+                'prompt_tokens': metadata.prompt_token_count,
+                'completion_tokens': metadata.candidates_token_count,
+                'total_tokens': metadata.total_token_count,
+                'cached_tokens': getattr(metadata, 'cached_content_token_count', 0)
+            }
+        except:
+            # Fallback if metadata not available
+            return {
+                'prompt_tokens': 0,
+                'completion_tokens': 0,
+                'total_tokens': 0,
+                'cached_tokens': 0
+            }
+    def calculate_cost(self, usage: Dict, model: str) -> float:
+        """Calculate Gemini cost"""
+        # Gemini pricing per 1M tokens (as of Jan 2026)
+        model_costs = {
+            "gemini-1.5-flash": {"input": 0.075, "cached": 0.01875, "output": 0.30},
+            "gemini-1.5-pro": {"input": 1.25, "cached": 0.3125, "output": 5.00},
+            "gemini-2.0-flash-exp": {"input": 0.0, "cached": 0.0, "output": 0.0},  # Free during preview
+        }
+        costs = model_costs.get(model, model_costs["gemini-1.5-flash"])
+        prompt_tokens = usage['prompt_tokens']
+        cached_tokens = usage.get('cached_tokens', 0)
+        completion_tokens = usage['completion_tokens']
+        actual_input_tokens = prompt_tokens - cached_tokens
+        input_cost = (actual_input_tokens / 1_000_000) * costs["input"]
+        cached_cost = (cached_tokens / 1_000_000) * costs["cached"]
+        output_cost = (completion_tokens / 1_000_000) * costs["output"]
+        return input_cost + cached_cost + output_cost
+class GemmaProvider(LLMProvider):
+    """NVIDIA Gemma API implementation"""
+    def __init__(self, api_key: Optional[str] = None):
+        import requests
+        self.requests = requests
+        self.api_key = api_key or os.environ.get("NVIDIA_API_KEY")
+        if not self.api_key:
+            raise ValueError(
+                "NVIDIA_API_KEY not found. "
+                "Set it as an environment variable or pass it to the constructor."
+            )
+        self.invoke_url = "https://integrate.api.nvidia.com/v1/chat/completions"
+    def create_completion(self, messages: List[Dict], model: str, **kwargs) -> Any:
+        """Create Gemma chat completion"""
+        # Prepare headers
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Accept": "application/json"  # Non-streaming for simplicity
+        }
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": messages,
+            "max_tokens": kwargs.get('max_tokens', 512),
+            "temperature": kwargs.get('temperature', 0.20),
+            "top_p": kwargs.get('top_p', 0.70),
+            "stream": False  # Disable streaming for now
+        }
+        # Make request
+        response = self.requests.post(self.invoke_url, headers=headers, json=payload)
+        response.raise_for_status()
+        return response.json()
+    def get_response_text(self, completion: Any) -> str:
+        """Extract text from Gemma response"""
+        # NVIDIA API returns OpenAI-compatible format
+        if isinstance(completion, dict):
+            return completion['choices'][0]['message']['content']
+        return str(completion)
+    def get_usage_info(self, completion: Any) -> Dict:
+        """Extract usage from Gemma response"""
+        try:
+            usage = completion.get('usage', {})
+            return {
+                'prompt_tokens': usage.get('prompt_tokens', 0),
+                'completion_tokens': usage.get('completion_tokens', 0),
+                'total_tokens': usage.get('total_tokens', 0),
+                'cached_tokens': 0  # NVIDIA API doesn't provide cached token info
+            }
+        except:
+            return {
+                'prompt_tokens': 0,
+                'completion_tokens': 0,
+                'total_tokens': 0,
+                'cached_tokens': 0
+            }
+    def calculate_cost(self, usage: Dict, model: str) -> float:
+        """Calculate Gemma cost"""
+        # NVIDIA API pricing (check current pricing at build.nvidia.com)
+        # For now, using placeholder values - update with actual pricing
+        model_costs = {
+            "google/gemma-3-27b-it": {"input": 0.0, "output": 0.0},  # Free tier or update with actual costs
+        }
+        costs = model_costs.get(model, {"input": 0.0, "output": 0.0})
+        prompt_tokens = usage['prompt_tokens']
+        completion_tokens = usage['completion_tokens']
+        input_cost = (prompt_tokens / 1_000_000) * costs["input"]
+        output_cost = (completion_tokens / 1_000_000) * costs["output"]
+        return input_cost + output_cost
+def get_provider(provider_name: str, api_key: Optional[str] = None) -> LLMProvider:
+    """
+    Factory function to get LLM provider.
+    Args:
+        provider_name: Name of provider ('openai' or 'gemini')
+        api_key: Optional API key (uses env var if not provided)
+    Returns:
+        LLMProvider instance
+    """
+    providers = {
+        'openai': OpenAIProvider,
+        'gemini': GeminiProvider,
+        'gemma': GemmaProvider,
+    }
+    if provider_name not in providers:
+        raise ValueError(
+            f"Unknown provider: {provider_name}. "
+            f"Available providers: {list(providers.keys())}"
+        )
+    return providers[provider_name](api_key=api_key)
+def get_default_model(provider_name: str) -> str:
+    """Get default model for a provider"""
+    defaults = {
+        'openai': 'gpt-4o-mini',
+        'gemini': 'gemini-1.5-flash',
+        'gemma': 'google/gemma-3-27b-it',
+    }
+    return defaults.get(provider_name, 'gpt-4o-mini')
+if __name__ == "__main__":
+    # Test script
+    print("Testing LLM Provider abstraction...")
+    # Test OpenAI
+    try:
+        provider = get_provider('openai')
+        print("✅ OpenAI provider initialized")
+    except Exception as e:
+        print(f"❌ OpenAI provider failed: {e}")
+    # Test Gemini
+    try:
+        provider = get_provider('gemini')
+        print("✅ Gemini provider initialized")
+    except Exception as e:
+        print(f"❌ Gemini provider failed: {e}")
+    # Test Gemma
+    try:
+        provider = get_provider('gemma')
+        print("✅ Gemma provider initialized")
+    except Exception as e:
+        print(f"❌ Gemma provider failed: {e}")

codes/rate_limiter.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+Rate Limiter for OpenAI API to avoid hitting TPM (tokens per minute) limits.
+"""
+import time
+from typing import List, Tuple
+class RateLimiter:
+    """Smart rate limiter that tracks token usage and sleeps only when necessary."""
+    def __init__(self, max_tokens_per_minute: int = 95000, buffer: int = 5000):
+        """
+        Initialize the rate limiter.
+        Args:
+            max_tokens_per_minute: Maximum tokens allowed per minute (default: 95K for safety)
+            buffer: Safety buffer to stay under limit (default: 5K)
+        """
+        self.max_tokens = max_tokens_per_minute - buffer
+        self.tokens_used: List[Tuple[float, int]] = []  # [(timestamp, tokens), ...]
+        self.total_waits = 0
+        self.total_wait_time = 0.0
+    def wait_if_needed(self, tokens_needed: int) -> None:
+        """
+        Check if we need to wait before making the next API call.
+        Args:
+            tokens_needed: Estimated tokens for the next API call
+        """
+        now = time.time()
+        # Remove tokens older than 60 seconds (sliding window)
+        self.tokens_used = [
+            (ts, tok) for ts, tok in self.tokens_used
+            if now - ts < 60
+        ]
+        # Calculate tokens used in last 60 seconds
+        tokens_in_window = sum(tok for _, tok in self.tokens_used)
+        # If adding new request would exceed limit, wait
+        if tokens_in_window + tokens_needed > self.max_tokens:
+            # Calculate how long to wait
+            oldest_timestamp = self.tokens_used[0][0]
+            wait_time = 60 - (now - oldest_timestamp) + 1  # +1 for safety
+            print(f"⏰ Rate limit approaching ({tokens_in_window + tokens_needed}/{self.max_tokens} tokens)")
+            print(f"   Waiting {wait_time:.1f}s for rate limit window to reset...")
+            time.sleep(wait_time)
+            self.total_waits += 1
+            self.total_wait_time += wait_time
+            # Clear old tokens after waiting
+            now = time.time()
+            self.tokens_used = [
+                (ts, tok) for ts, tok in self.tokens_used
+                if now - ts < 60
+            ]
+        # Record this request
+        self.tokens_used.append((now, tokens_needed))
+    def get_stats(self) -> dict:
+        """Get statistics about rate limiting."""
+        return {
+            'total_waits': self.total_waits,
+            'total_wait_time': self.total_wait_time,
+            'current_window_tokens': sum(tok for _, tok in self.tokens_used)
+        }
+    def print_stats(self) -> None:
+        """Print rate limiting statistics."""
+        stats = self.get_stats()
+        print("\n" + "="*50)
+        print("📊 Rate Limiter Statistics")
+        print("="*50)
+        print(f"Total waits: {stats['total_waits']}")
+        print(f"Total wait time: {stats['total_wait_time']:.1f}s")
+        print(f"Current window usage: {stats['current_window_tokens']} tokens")
+        print("="*50 + "\n")
+def estimate_tokens(text: str, overhead: int = 800) -> int:
+    """
+    Estimate tokens for a text string.
+    Args:
+        text: Input text
+        overhead: Additional tokens for system prompts, formatting, etc.
+    Returns:
+        Estimated token count
+    """
+    # Rough estimation: 1 token ≈ 4 characters
+    content_tokens = len(str(text)) // 4
+    return content_tokens + overhead

codes/test_gemma.py ADDED Viewed

	@@ -0,0 +1,39 @@

+"""
+Test script to verify Gemma provider works with real API calls
+"""
+import os
+from llm_provider import get_provider, get_default_model
+# Set API key
+os.environ['NVIDIA_API_KEY'] = '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'
+# Initialize Gemma provider
+print("Initializing Gemma provider...")
+provider = get_provider('gemma')
+model = get_default_model('gemma')
+print(f"✅ Provider initialized with model: {model}")
+# Test a simple completion
+print("\nTesting completion...")
+messages = [
+    {"role": "user", "content": "Say 'Hello, I am Gemma!' in exactly those words."}
+]
+try:
+    completion = provider.create_completion(messages, model, max_tokens=50)
+    response_text = provider.get_response_text(completion)
+    usage = provider.get_usage_info(completion)
+    cost = provider.calculate_cost(usage, model)
+    print(f"\n✅ Completion successful!")
+    print(f"Response: {response_text}")
+    print(f"\nUsage:")
+    print(f"  - Prompt tokens: {usage['prompt_tokens']}")
+    print(f"  - Completion tokens: {usage['completion_tokens']}")
+    print(f"  - Total tokens: {usage['total_tokens']}")
+    print(f"  - Cost: ${cost:.6f}")
+except Exception as e:
+    print(f"\n❌ Completion failed: {e}")
+    import traceback
+    traceback.print_exc()

codes/utils.py ADDED Viewed

	@@ -0,0 +1,440 @@

+import json
+import re
+import os
+from datetime import datetime
+def extract_planning(trajectories_json_file_path):
+    with open(trajectories_json_file_path) as f:
+        traj = json.load(f)
+    context_lst = []
+    for turn in traj:
+        if turn['role'] == 'assistant':
+            # context_lst.append(turn['content'])
+            content = turn['content']
+            if "</think>" in content:
+                content = content.split("</think>")[-1].strip()
+            context_lst.append(content)
+    context_lst = context_lst[:3]
+    return context_lst
+def content_to_json(data):
+    clean_data = re.sub(r'\[CONTENT\]|\[/CONTENT\]', '', data).strip()
+    clean_data = re.sub(r'(".*?"),\s*#.*', r'\1,', clean_data)
+    clean_data = re.sub(r',\s*\]', ']', clean_data)
+    clean_data = re.sub(r'\n\s*', '', clean_data)
+    # JSON parsing
+    try:
+        json_data = json.loads(clean_data)
+        return json_data
+    except json.JSONDecodeError as e:
+        # print(e)
+        return content_to_json2(data)
+def content_to_json2(data):
+    # remove [CONTENT][/CONTENT]
+    clean_data = re.sub(r'\[CONTENT\]|\[/CONTENT\]', '', data).strip()
+    # "~~~~", #comment -> "~~~~",
+    clean_data = re.sub(r'(".*?"),\s*#.*', r'\1,', clean_data)
+    # "~~~~" #comment → "~~~~"
+    clean_data = re.sub(r'(".*?")\s*#.*', r'\1', clean_data)
+    # ("~~~~",] -> "~~~~"])
+    clean_data = re.sub(r',\s*\]', ']', clean_data)
+    clean_data = re.sub(r'\n\s*', '', clean_data)
+    # JSON parsing
+    try:
+        json_data = json.loads(clean_data)
+        return json_data
+    except json.JSONDecodeError as e:
+        # print("Json parsing error", e)
+        return content_to_json3(data)
+def content_to_json3(data):
+    # remove [CONTENT] [/CONTENT]
+    clean_data = re.sub(r'\[CONTENT\]|\[/CONTENT\]', '', data).strip()
+    # "~~~~", #comment -> "~~~~",
+    clean_data = re.sub(r'(".*?"),\s*#.*', r'\1,', clean_data)
+    # "~~~~" #comment → "~~~~"
+    clean_data = re.sub(r'(".*?")\s*#.*', r'\1', clean_data)
+    # remove ("~~~~",] -> "~~~~"])
+    clean_data = re.sub(r',\s*\]', ']', clean_data)
+    clean_data = re.sub(r'\n\s*', '', clean_data)
+    clean_data = re.sub(r'"""', '"', clean_data)  # Replace triple double quotes
+    clean_data = re.sub(r"'''", "'", clean_data)  # Replace triple single quotes
+    clean_data = re.sub(r"\\", "'", clean_data)  # Replace \
+    # JSON parsing
+    try:
+        json_data = json.loads(f"""{clean_data}""")
+        return json_data
+    except json.JSONDecodeError as e:
+        # print(e)
+        # print(f"[DEBUG] utils.py > content_to_json3 ")
+        # return None
+        return content_to_json4(data)
+def content_to_json4(data):
+    # 1. Extract Logic Analysis, Task list
+    pattern = r'"Logic Analysis":\s*(\[[\s\S]*?\])\s*,\s*"Task list":\s*(\[[\s\S]*?\])'
+    match = re.search(pattern, data)
+    if match:
+        logic_analysis = json.loads(match.group(1))
+        task_list = json.loads(match.group(2))
+        result = {
+            "Logic Analysis": logic_analysis,
+            "Task list": task_list
+        }
+    else:
+        result = {}
+    # print(json.dumps(result, indent=2))
+    return result
+def extract_code_from_content(content):
+    pattern = r'^```(?:\w+)?\s*\n(.*?)(?=^```)```'
+    code = re.findall(pattern, content, re.DOTALL | re.MULTILINE)
+    if len(code) == 0:
+        return ""
+    else:
+        return code[0]
+def extract_code_from_content2(content):
+    pattern = r'```python\s*(.*?)```'
+    result = re.search(pattern, content, re.DOTALL)
+    if result:
+        extracted_code = result.group(1).strip()
+    else:
+        extracted_code = ""
+        print("[WARNING] No Python code found.")
+    return extracted_code
+def format_json_data(data):
+    formatted_text = ""
+    for key, value in data.items():
+        formatted_text += "-" * 40 + "\n"
+        formatted_text += "[" + key + "]\n"
+        if isinstance(value, list):
+            for item in value:
+                formatted_text += f"- {item}\n"
+        else:
+            formatted_text += str(value) + "\n"
+        formatted_text += "\n"
+    return formatted_text
+def cal_cost(response_json, model_name):
+    model_cost = {
+        # OpenAI Models
+        "gpt-4o-mini": {"input": 0.150, "cached_input": 0.075, "output": 0.600},
+        "gpt-4o": {"input": 2.50, "cached_input": 1.25, "output": 10.00},
+        # gpt-4o-realtime-preview
+        "gpt-4o-realtime-preview": {"input": 5.00, "cached_input": 2.50, "output": 20.00},
+        "gpt-4o-realtime-preview-2024-12-17": {"input": 5.00, "cached_input": 2.50, "output": 20.00},
+        "gpt-4o-realtime-preview-2024-10-01": {"input": 5.00, "cached_input": 2.50, "output": 20.00},
+        # gpt-4o-mini
+        "gpt-4o-mini": {"input": 0.15, "cached_input": 0.075, "output": 0.60},
+        "gpt-4o-mini-2024-07-18": {"input": 0.15, "cached_input": 0.075, "output": 0.60},
+        # gpt-4o-mini-audio-preview
+        "gpt-4o-mini-audio-preview": {"input": 0.15, "cached_input": None, "output": 0.60},
+        "gpt-4o-mini-audio-preview-2024-12-17": {"input": 0.15, "cached_input": None, "output": 0.60},
+        # gpt-4o-mini-realtime-preview
+        "gpt-4o-mini-realtime-preview": {"input": 0.60, "cached_input": 0.30, "output": 2.40},
+        "gpt-4o-mini-realtime-preview-2024-12-17": {"input": 0.60, "cached_input": 0.30, "output": 2.40},
+        # o1
+        "o1": {"input": 15.00, "cached_input": 7.50, "output": 60.00},
+        "o1-2024-12-17": {"input": 15.00, "cached_input": 7.50, "output": 60.00},
+        "o1-preview-2024-09-12": {"input": 15.00, "cached_input": 7.50, "output": 60.00},
+        # o1-pro
+        "o1-pro": {"input": 150.00, "cached_input": None, "output": 600.00},
+        "o1-pro-2025-03-19": {"input": 150.00, "cached_input": None, "output": 600.00},
+        # o3
+        "o3": {"input": 10.00, "cached_input": 2.50, "output": 40.00},
+        "o3-2025-04-16": {"input": 10.00, "cached_input": 2.50, "output": 40.00},
+        # o4-mini
+        "o4-mini": {"input": 1.10, "cached_input": 0.275, "output": 4.40},
+        "o4-mini-2025-04-16": {"input": 1.10, "cached_input": 0.275, "output": 4.40},
+        # o3-mini
+        "o3-mini": {"input": 1.10, "cached_input": 0.55, "output": 4.40},
+        "o3-mini-2025-01-31": {"input": 1.10, "cached_input": 0.55, "output": 4.40},
+        # o1-mini
+        "o1-mini": {"input": 1.10, "cached_input": 0.55, "output": 4.40},
+        "o1-mini-2024-09-12": {"input": 1.10, "cached_input": 0.55, "output": 4.40},
+        # gpt-4o-mini-search-preview
+        "gpt-4o-mini-search-preview": {"input": 0.15, "cached_input": None, "output": 0.60},
+        "gpt-4o-mini-search-preview-2025-03-11": {"input": 0.15, "cached_input": None, "output": 0.60},
+        # gpt-4o-search-preview
+        "gpt-4o-search-preview": {"input": 2.50, "cached_input": None, "output": 10.00},
+        "gpt-4o-search-preview-2025-03-11": {"input": 2.50, "cached_input": None, "output": 10.00},
+        # computer-use-preview
+        "computer-use-preview": {"input": 3.00, "cached_input": None, "output": 12.00},
+        "computer-use-preview-2025-03-11": {"input": 3.00, "cached_input": None, "output": 12.00},
+        # gpt-image-1
+        "gpt-image-1": {"input": 5.00, "cached_input": None, "output": None},
+        # Google Gemini Models
+        "gemini-1.5-flash": {"input": 0.075, "cached_input": 0.01875, "output": 0.30},
+        "gemini-1.5-pro": {"input": 1.25, "cached_input": 0.3125, "output": 5.00},
+        "gemini-2.0-flash-exp": {"input": 0.0, "cached_input": 0.0, "output": 0.0},
+        "gemini-3-flash-preview": {"input": 0.0, "cached_input": 0.0, "output": 0.0},
+        "models/gemini-1.5-flash": {"input": 0.075, "cached_input": 0.01875, "output": 0.30},
+        "models/gemini-1.5-pro": {"input": 1.25, "cached_input": 0.3125, "output": 5.00},
+        "models/gemini-2.0-flash-exp": {"input": 0.0, "cached_input": 0.0, "output": 0.0},
+        "models/gemini-3-flash-preview": {"input": 0.0, "cached_input": 0.0, "output": 0.0},
+    }
+    # Extract token counts
+    prompt_tokens = response_json["usage"]["prompt_tokens"]
+    completion_tokens = response_json["usage"]["completion_tokens"]
+    # Handle cached tokens (may not exist in all providers)
+    cached_tokens = 0
+    if "prompt_tokens_details" in response_json["usage"]:
+        cached_tokens = response_json["usage"]["prompt_tokens_details"].get("cached_tokens", 0)
+    elif "cached_tokens" in response_json["usage"]:
+        cached_tokens = response_json["usage"]["cached_tokens"]
+    # input token = (prompt_tokens - cached_tokens)
+    actual_input_tokens = prompt_tokens - cached_tokens
+    output_tokens = completion_tokens
+    # Get cost info with fallback for unknown models
+    if model_name not in model_cost:
+        print(f"⚠️  Warning: Unknown model '{model_name}', assuming free tier")
+        cost_info = {"input": 0.0, "cached_input": 0.0, "output": 0.0}
+    else:
+        cost_info = model_cost[model_name]
+    input_cost = (actual_input_tokens / 1_000_000) * cost_info['input']
+    cached_input_cost = (cached_tokens / 1_000_000) * cost_info['cached_input']
+    output_cost = (output_tokens / 1_000_000) * cost_info['output']
+    total_cost = input_cost + cached_input_cost + output_cost
+    return {
+        'model_name': model_name,
+        'actual_input_tokens': actual_input_tokens,
+        'input_cost': input_cost,
+        'cached_tokens': cached_tokens,
+        'cached_input_cost': cached_input_cost,
+        'output_tokens': output_tokens,
+        'output_cost': output_cost,
+        'total_cost': total_cost,
+    }
+def load_accumulated_cost(accumulated_cost_file):
+    if os.path.exists(accumulated_cost_file):
+        with open(accumulated_cost_file, "r", encoding="utf-8") as f:
+            data = json.load(f)
+            return data.get("total_cost", 0.0)
+    else:
+        return 0.0
+def save_accumulated_cost(accumulated_cost_file, cost):
+    with open(accumulated_cost_file, "w", encoding="utf-8") as f:
+        json.dump({"total_cost": cost}, f)
+def print_response(completion_json, is_llm=False):
+    print("============================================")
+    if is_llm:
+        print(completion_json['text'])
+    else:
+        print(completion_json['choices'][0]['message']['content'])
+    print("============================================\n")
+def print_log_cost(completion_json, gpt_version, current_stage, output_dir, total_accumulated_cost):
+    usage_info = cal_cost(completion_json, gpt_version)
+    current_cost = usage_info['total_cost']
+    total_accumulated_cost += current_cost
+    output_lines = []
+    output_lines.append("🌟 Usage Summary 🌟")
+    output_lines.append(f"{current_stage}")
+    output_lines.append(f"🛠️ Model: {usage_info['model_name']}")
+    output_lines.append(f"📥 Input tokens: {usage_info['actual_input_tokens']} (Cost: ${usage_info['input_cost']:.8f})")
+    output_lines.append(f"📦 Cached input tokens: {usage_info['cached_tokens']} (Cost: ${usage_info['cached_input_cost']:.8f})")
+    output_lines.append(f"📤 Output tokens: {usage_info['output_tokens']} (Cost: ${usage_info['output_cost']:.8f})")
+    output_lines.append(f"💵 Current total cost: ${current_cost:.8f}")
+    output_lines.append(f"🪙 Accumulated total cost so far: ${total_accumulated_cost:.8f}")
+    output_lines.append("============================================\n")
+    output_text = "\n".join(output_lines)
+    print(output_text)
+    with open(f"{output_dir}/cost_info.log", "a", encoding="utf-8") as f:
+        f.write(output_text + "\n")
+    return total_accumulated_cost
+def num_tokens_from_messages(messages, model="gpt-4o-2024-08-06"):
+    import tiktoken
+    """Return the number of tokens used by a list of messages."""
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        print("Warning: model not found. Using o200k_base encoding.")
+        encoding = tiktoken.get_encoding("o200k_base")
+    if model in {
+        "gpt-3.5-turbo-0125",
+        "gpt-4-0314",
+        "gpt-4-32k-0314",
+        "gpt-4-0613",
+        "gpt-4-32k-0613",
+        "gpt-4o-mini-2024-07-18",
+        "gpt-4o-2024-08-06"
+        }:
+        tokens_per_message = 3
+        tokens_per_name = 1
+    elif "gpt-3.5-turbo" in model:
+        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.")
+        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0125")
+    elif "gpt-4o-mini" in model:
+        print("Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.")
+        return num_tokens_from_messages(messages, model="gpt-4o-mini-2024-07-18")
+    elif "gpt-4o" in model:
+        print("Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.")
+        return num_tokens_from_messages(messages, model="gpt-4o-2024-08-06")
+    elif "gpt-4" in model:
+        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
+        return num_tokens_from_messages(messages, model="gpt-4-0613")
+    else:
+        raise NotImplementedError(
+            f"""num_tokens_from_messages() is not implemented for model {model}."""
+        )
+    num_tokens = 0
+    for message in messages:
+        num_tokens += tokens_per_message
+        for key, value in message.items():
+            # num_tokens += len(encoding.encode(value)
+            num_tokens += len(encoding.encode(value, allowed_special={"<|endoftext|>"},disallowed_special=()))
+            if key == "name":
+                num_tokens += tokens_per_name
+    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+    return num_tokens
+def read_all_files(directory, allowed_ext, is_print=True):
+    """Recursively read all .py files in the specified directory and return their contents."""
+    all_files_content = {}
+    for root, _, files in os.walk(directory):  # Recursively traverse directories
+        for filename in files:
+            relative_path = os.path.relpath(os.path.join(root, filename), directory)  # Preserve directory structure
+            # print(f"fn: {filename}\tdirectory: {directory}")
+            _file_name, ext = os.path.splitext(filename)
+            is_skip = False
+            if len(directory) < len(root):
+                root2 = root[len(directory)+1:]
+                for dirname in root2.split("/"):
+                    if dirname.startswith("."):
+                        is_skip = True
+                        break
+            if filename.startswith(".") or "requirements.txt" in filename or ext == "" or is_skip:
+                if is_print and ext == "":
+                    print(f"[SKIP] {os.path.join(root, filename)}")
+                continue
+            if ext not in allowed_ext:
+                if _file_name.lower() != "readme":
+                    if is_print:
+                        print(f"[SKIP] {os.path.join(root, filename)}")
+                    continue
+            try:
+                filepath = os.path.join(root, filename)
+                file_size = os.path.getsize(filepath) # bytes
+                if file_size > 204800: # > 200KB
+                    print(f"[BIG] {filepath} {file_size}")
+                with open(filepath, "r") as file: # encoding="utf-8"
+                    all_files_content[relative_path] = file.read()
+            except Exception as e:
+                print(e)
+                print(f"[SKIP] {os.path.join(root, filename)}")
+    return all_files_content
+def read_python_files(directory):
+    """Recursively read all .py files in the specified directory and return their contents."""
+    python_files_content = {}
+    for root, _, files in os.walk(directory):  # Recursively traverse directories
+        for filename in files:
+            if filename.endswith(".py"):  # Check if file has .py extension
+                relative_path = os.path.relpath(os.path.join(root, filename), directory)  # Preserve directory structure
+                with open(os.path.join(root, filename), "r", encoding="utf-8") as file:
+                    python_files_content[relative_path] = file.read()
+    return python_files_content
+def extract_json_from_string(text):
+    # Extract content inside ```yaml\n...\n```
+    match = re.search(r"```json\n(.*?)\n```", text, re.DOTALL)
+    if match:
+        yaml_content = match.group(1)
+        return yaml_content
+    else:
+        print("No JSON content found.")
+        return ""
+def get_now_str():
+    now = datetime.now()
+    now = str(now)
+    now = now.split(".")[0]
+    now = now.replace("-","").replace(" ","_").replace(":","")
+    return now # now - "20250427_205124"

main.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import os, sys, shutil, tempfile, zipfile, asyncio, subprocess
+from pathlib import Path
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+REPO_ROOT = Path(__file__).parent.resolve()
+CODES_DIR = REPO_ROOT / "codes"
+app = FastAPI(title="Blog2Code API", version="1.0.0")
+ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*").split(",")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=ALLOWED_ORIGINS,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def _run(script: str, args: list, extra_env: dict) -> None:
+    cmd = [sys.executable, str(CODES_DIR / script)] + args
+    result = subprocess.run(
+        cmd,
+        cwd=str(REPO_ROOT),
+        env={**os.environ, **extra_env},
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(
+            f"{script} failed (exit {result.returncode}):\n"
+            f"STDOUT: {result.stdout[-2000:]}\n"
+            f"STDERR: {result.stderr[-2000:]}"
+        )
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+@app.post("/generate")
+async def generate(
+    url:  str        = Form(None),
+    file: UploadFile = File(None),
+):
+    if not url and not file:
+        raise HTTPException(400, "Provide either 'url' or 'file'.")
+    tmp        = Path(tempfile.mkdtemp())
+    data_dir   = tmp / "data"
+    output_dir = tmp / "output"
+    data_dir.mkdir(parents=True)
+    output_dir.mkdir(parents=True)
+    try:
+        if file:
+            suffix     = Path(file.filename).suffix or ".md"
+            input_path = tmp / f"blog{suffix}"
+            input_path.write_bytes(await file.read())
+            source_args = ["--input_path", str(input_path)]
+        else:
+            source_args = ["--url", url.strip()]
+        provider  = os.getenv("PROVIDER", "gemini")
+        model     = os.getenv("MODEL", "")
+        extra_env = {"MODEL": model} if model else {}
+        blog_json = data_dir / "blog_data.json"
+        def run_pipeline():
+            # Stage 0 – parse blog
+            _run("0_blog_process.py",
+                 source_args + ["--output_json_path", str(blog_json)],
+                 extra_env)
+            if not blog_json.exists():
+                candidates = list(data_dir.glob("*.json"))
+                if not candidates:
+                    raise RuntimeError("Stage 0: no JSON output found.")
+                blog_json_path = candidates[0]
+            else:
+                blog_json_path = blog_json
+            # Stage 1 – planning
+            _run("1_planning.py", [
+                "--blog_json_path", str(blog_json_path),
+                "--output_dir",     str(data_dir),
+                "--provider",       provider,
+                "--content_type",   "blog",
+            ], extra_env)
+            # Stage 1.1 – extract config
+            _run("1_1_extract_config.py", [
+                "--output_dir", str(data_dir),
+            ], extra_env)
+            config_yaml = data_dir / "planning_config.yaml"
+            if not config_yaml.exists():
+                raise RuntimeError("Stage 1.1: planning_config.yaml not found.")
+            # Stage 2 – analysis
+            _run("2_analyzing.py", [
+                "--pdf_json_path", str(blog_json_path),
+                "--output_dir",    str(data_dir),
+                "--provider",      provider,
+            ], extra_env)
+            # Stage 3 – code generation
+            _run("3_coding.py", [
+                "--pdf_json_path",   str(blog_json_path),
+                "--output_dir",      str(data_dir),
+                "--output_repo_dir", str(output_dir),
+                "--provider",        provider,
+            ], extra_env)
+        await asyncio.get_event_loop().run_in_executor(None, run_pipeline)
+        zip_path = tmp / "repo.zip"
+        files = [f for f in output_dir.rglob("*") if f.is_file()]
+        if not files:
+            raise HTTPException(500, "Pipeline produced no output files.")
+        with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+            for f in files:
+                zf.write(f, f.relative_to(output_dir))
+        return FileResponse(
+            path=str(zip_path),
+            media_type="application/zip",
+            filename="generated-repo.zip",
+        )
+    except HTTPException:
+        shutil.rmtree(tmp, ignore_errors=True)
+        raise
+    except Exception as exc:
+        shutil.rmtree(tmp, ignore_errors=True)
+        raise HTTPException(500, str(exc)) from exc

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi
+uvicorn
+python-multipart
+openai>=1.65.4
+tiktoken>=0.9.0
+google-generativeai>=0.8.0
+beautifulsoup4>=4.12.0
+requests>=2.31.0
+markdown>=3.5.0
+html2text>=2020.1.16
+lxml>=5.0.0
+tqdm>=4.60.0
+pyyaml>=6.0