Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

akseljoonas HF Staff commited on 27 days ago

Commit

54049e9

1 Parent(s): ee20af8

Remove stale tests directory

Browse files

Files changed (8) hide show

tests/__init__.py +0 -0
tests/integration/__init__.py +0 -0
tests/integration/tools/__init__.py +0 -0
tests/integration/tools/test_jobs_integration.py +0 -452
tests/integration/tools/test_papers_integration.py +0 -572
tests/unit/__init__.py +0 -0
tests/unit/tools/__init__.py +0 -0
tests/unit/tools/test_jobs_tool.py +0 -537

tests/__init__.py DELETED Viewed

File without changes

tests/integration/__init__.py DELETED Viewed

File without changes

tests/integration/tools/__init__.py DELETED Viewed

File without changes

tests/integration/tools/test_jobs_integration.py DELETED Viewed

@@ -1,452 +0,0 @@
-#!/usr/bin/env python3
-"""
-Integration tests for refactored HF Jobs Tool
-Tests with real HF API using HF_TOKEN from environment
-"""
-import os
-import sys
-import asyncio
-import time
-# Add parent directory to path
-sys.path.insert(0, '.')
-from agent.tools.jobs_tool import HfJobsTool
-# ANSI color codes for better output
-GREEN = '\033[92m'
-YELLOW = '\033[93m'
-RED = '\033[91m'
-BLUE = '\033[94m'
-RESET = '\033[0m'
-def print_test(msg):
-    """Print test message in blue"""
-    print(f"{BLUE}[TEST]{RESET} {msg}")
-def print_success(msg):
-    """Print success message in green"""
-    print(f"{GREEN}✓{RESET} {msg}")
-def print_warning(msg):
-    """Print warning message in yellow"""
-    print(f"{YELLOW}⚠{RESET} {msg}")
-def print_error(msg):
-    """Print error message in red"""
-    print(f"{RED}✗{RESET} {msg}")
-async def test_basic_job_run(tool):
-    """Test running a basic job"""
-    print_test("Running a simple Python job...")
-    result = await tool.execute({
-        "operation": "run",
-        "args": {
-            "image": "python:3.12",
-            "command": ["python", "-c", "print('Hello from HF Jobs!')"],
-            "flavor": "cpu-basic",
-            "timeout": "5m",
-            "detach": True  # Don't wait for completion
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to run job: {result['formatted']}")
-        return None
-    # Extract job ID from response
-    import re
-    job_id_match = re.search(r'\*\*Job ID:\*\* (\S+)', result['formatted'])
-    if job_id_match:
-        job_id = job_id_match.group(1)
-        print_success(f"Job started with ID: {job_id}")
-        return job_id
-    print_error("Could not extract job ID from response")
-    return None
-async def test_list_jobs(tool):
-    """Test listing jobs"""
-    print_test("Listing running jobs...")
-    result = await tool.execute({
-        "operation": "ps",
-        "args": {}
-    })
-    if result.get("isError"):
-        print_error(f"Failed to list jobs: {result['formatted']}")
-        return False
-    print_success(f"Listed jobs: {result['totalResults']} running")
-    if result['totalResults'] > 0:
-        print(f"   {result['formatted'][:200]}...")
-    return True
-async def test_inspect_job(tool, job_id):
-    """Test inspecting a specific job"""
-    print_test(f"Inspecting job {job_id}...")
-    result = await tool.execute({
-        "operation": "inspect",
-        "args": {
-            "job_id": job_id
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to inspect job: {result['formatted']}")
-        return False
-    print_success(f"Inspected job successfully")
-    return True
-async def test_get_logs(tool, job_id):
-    """Test fetching job logs"""
-    print_test(f"Fetching logs for job {job_id}...")
-    # Wait a bit for logs to be available
-    await asyncio.sleep(2)
-    result = await tool.execute({
-        "operation": "logs",
-        "args": {
-            "job_id": job_id
-        }
-    })
-    if result.get("isError"):
-        print_warning(f"Could not fetch logs (might be too early): {result['formatted'][:100]}")
-        return False
-    print_success(f"Fetched logs successfully")
-    if "Hello from HF Jobs!" in result['formatted']:
-        print_success("  Found expected output in logs!")
-    return True
-async def test_cancel_job(tool, job_id):
-    """Test cancelling a job"""
-    print_test(f"Cancelling job {job_id}...")
-    result = await tool.execute({
-        "operation": "cancel",
-        "args": {
-            "job_id": job_id
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to cancel job: {result['formatted']}")
-        return False
-    print_success(f"Cancelled job successfully")
-    return True
-async def test_uv_job(tool):
-    """Test running a UV job"""
-    print_test("Running a UV Python script job...")
-    result = await tool.execute({
-        "operation": "uv",
-        "args": {
-            "script": "print('Hello from UV!')\nimport sys\nprint(f'Python version: {sys.version}')",
-            "flavor": "cpu-basic",
-            "timeout": "5m",
-            "detach": True
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to run UV job: {result['formatted']}")
-        return None
-    # Extract job ID
-    import re
-    job_id_match = re.search(r'UV Job started: (\S+)', result['formatted'])
-    if job_id_match:
-        job_id = job_id_match.group(1)
-        print_success(f"UV job started with ID: {job_id}")
-        return job_id
-    print_error("Could not extract job ID from response")
-    return None
-async def test_list_all_jobs(tool):
-    """Test listing all jobs (including completed)"""
-    print_test("Listing all jobs (including completed)...")
-    result = await tool.execute({
-        "operation": "ps",
-        "args": {
-            "all": True
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to list all jobs: {result['formatted']}")
-        return False
-    print_success(f"Listed all jobs: {result['totalResults']} total")
-    return True
-async def test_scheduled_job(tool):
-    """Test creating and managing a scheduled job"""
-    print_test("Creating a scheduled job (daily at midnight)...")
-    result = await tool.execute({
-        "operation": "scheduled run",
-        "args": {
-            "image": "python:3.12",
-            "command": ["python", "-c", "print('Scheduled job running!')"],
-            "schedule": "@daily",
-            "flavor": "cpu-basic",
-            "timeout": "5m"
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to create scheduled job: {result['formatted']}")
-        return None
-    # Extract scheduled job ID
-    import re
-    job_id_match = re.search(r'\*\*Scheduled Job ID:\*\* (\S+)', result['formatted'])
-    if not job_id_match:
-        print_error("Could not extract scheduled job ID")
-        return None
-    scheduled_job_id = job_id_match.group(1)
-    print_success(f"Scheduled job created with ID: {scheduled_job_id}")
-    return scheduled_job_id
-async def test_list_scheduled_jobs(tool):
-    """Test listing scheduled jobs"""
-    print_test("Listing scheduled jobs...")
-    result = await tool.execute({
-        "operation": "scheduled ps",
-        "args": {}
-    })
-    if result.get("isError"):
-        print_error(f"Failed to list scheduled jobs: {result['formatted']}")
-        return False
-    print_success(f"Listed scheduled jobs: {result['totalResults']} active")
-    return True
-async def test_inspect_scheduled_job(tool, scheduled_job_id):
-    """Test inspecting a scheduled job"""
-    print_test(f"Inspecting scheduled job {scheduled_job_id}...")
-    result = await tool.execute({
-        "operation": "scheduled inspect",
-        "args": {
-            "scheduled_job_id": scheduled_job_id
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to inspect scheduled job: {result['formatted']}")
-        return False
-    print_success(f"Inspected scheduled job successfully")
-    return True
-async def test_suspend_scheduled_job(tool, scheduled_job_id):
-    """Test suspending a scheduled job"""
-    print_test(f"Suspending scheduled job {scheduled_job_id}...")
-    result = await tool.execute({
-        "operation": "scheduled suspend",
-        "args": {
-            "scheduled_job_id": scheduled_job_id
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to suspend scheduled job: {result['formatted']}")
-        return False
-    print_success(f"Suspended scheduled job successfully")
-    return True
-async def test_resume_scheduled_job(tool, scheduled_job_id):
-    """Test resuming a scheduled job"""
-    print_test(f"Resuming scheduled job {scheduled_job_id}...")
-    result = await tool.execute({
-        "operation": "scheduled resume",
-        "args": {
-            "scheduled_job_id": scheduled_job_id
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to resume scheduled job: {result['formatted']}")
-        return False
-    print_success(f"Resumed scheduled job successfully")
-    return True
-async def test_delete_scheduled_job(tool, scheduled_job_id):
-    """Test deleting a scheduled job"""
-    print_test(f"Deleting scheduled job {scheduled_job_id}...")
-    result = await tool.execute({
-        "operation": "scheduled delete",
-        "args": {
-            "scheduled_job_id": scheduled_job_id
-        }
-    })
-    if result.get("isError"):
-        print_error(f"Failed to delete scheduled job: {result['formatted']}")
-        return False
-    print_success(f"Deleted scheduled job successfully")
-    return True
-async def main():
-    """Run all integration tests"""
-    print("=" * 70)
-    print(f"{BLUE}HF Jobs Tool - Integration Tests{RESET}")
-    print("=" * 70)
-    print()
-    # Check for HF_TOKEN
-    hf_token = os.environ.get('HF_TOKEN')
-    if not hf_token:
-        print_error("HF_TOKEN not found in environment variables!")
-        print_warning("Set it with: export HF_TOKEN='your_token_here'")
-        sys.exit(1)
-    print_success(f"Found HF_TOKEN (length: {len(hf_token)})")
-    print()
-    # Initialize tool with token
-    tool = HfJobsTool(hf_token=hf_token)
-    # Track job IDs for cleanup
-    job_ids = []
-    scheduled_job_ids = []
-    try:
-        # Test 1: Run basic job
-        print(f"\n{YELLOW}{'=' * 70}{RESET}")
-        print(f"{YELLOW}Test Suite 1: Regular Jobs{RESET}")
-        print(f"{YELLOW}{'=' * 70}{RESET}\n")
-        job_id = await test_basic_job_run(tool)
-        if job_id:
-            job_ids.append(job_id)
-            # Wait a moment for job to register
-            await asyncio.sleep(1)
-            # Test 2: List jobs
-            await test_list_jobs(tool)
-            # Test 3: Inspect job
-            await test_inspect_job(tool, job_id)
-            # Test 4: Get logs
-            await test_get_logs(tool, job_id)
-            # Test 5: Cancel job (cleanup)
-            await test_cancel_job(tool, job_id)
-        # Test 6: UV job
-        print()
-        uv_job_id = await test_uv_job(tool)
-        if uv_job_id:
-            job_ids.append(uv_job_id)
-            await asyncio.sleep(1)
-            await test_cancel_job(tool, uv_job_id)
-        # Test 7: List all jobs
-        print()
-        await test_list_all_jobs(tool)
-        # Test Suite 2: Scheduled Jobs
-        print(f"\n{YELLOW}{'=' * 70}{RESET}")
-        print(f"{YELLOW}Test Suite 2: Scheduled Jobs{RESET}")
-        print(f"{YELLOW}{'=' * 70}{RESET}\n")
-        scheduled_job_id = await test_scheduled_job(tool)
-        if scheduled_job_id:
-            scheduled_job_ids.append(scheduled_job_id)
-            # Wait a moment for job to register
-            await asyncio.sleep(1)
-            # Test scheduled job operations
-            await test_list_scheduled_jobs(tool)
-            print()
-            await test_inspect_scheduled_job(tool, scheduled_job_id)
-            print()
-            await test_suspend_scheduled_job(tool, scheduled_job_id)
-            print()
-            await test_resume_scheduled_job(tool, scheduled_job_id)
-            print()
-            # Cleanup: Delete scheduled job
-            await test_delete_scheduled_job(tool, scheduled_job_id)
-        # Final summary
-        print(f"\n{YELLOW}{'=' * 70}{RESET}")
-        print(f"{GREEN}✓ All integration tests completed!{RESET}")
-        print(f"{YELLOW}{'=' * 70}{RESET}\n")
-        print_success("Refactored implementation works correctly with real HF API")
-        print_success("All 13 operations tested and verified")
-        print()
-        print(f"{BLUE}Summary:{RESET}")
-        print(f"  • Regular jobs: ✓ run, list, inspect, logs, cancel")
-        print(f"  • UV jobs: ✓ run")
-        print(f"  • Scheduled jobs: ✓ create, list, inspect, suspend, resume, delete")
-        print()
-    except Exception as e:
-        print_error(f"Test failed with exception: {str(e)}")
-        import traceback
-        traceback.print_exc()
-        # Attempt cleanup
-        print(f"\n{YELLOW}Attempting cleanup...{RESET}")
-        for job_id in job_ids:
-            try:
-                await test_cancel_job(tool, job_id)
-            except:
-                pass
-        for scheduled_job_id in scheduled_job_ids:
-            try:
-                await test_delete_scheduled_job(tool, scheduled_job_id)
-            except:
-                pass
-        sys.exit(1)
-if __name__ == "__main__":
-    asyncio.run(main())

tests/integration/tools/test_papers_integration.py DELETED Viewed

@@ -1,572 +0,0 @@
-#!/usr/bin/env python3
-"""
-Integration tests for HF Papers Tool
-Tests with real HF and arXiv APIs — all endpoints are public, no auth required.
-Run: python tests/integration/tools/test_papers_integration.py
-"""
-import asyncio
-import re
-import sys
-sys.path.insert(0, ".")
-from agent.tools.papers_tool import hf_papers_handler
-# ANSI color codes
-GREEN = "\033[92m"
-YELLOW = "\033[93m"
-RED = "\033[91m"
-BLUE = "\033[94m"
-DIM = "\033[2m"
-RESET = "\033[0m"
-assertions_passed = 0
-assertions_failed = 0
-def print_test(msg):
-    print(f"\n{BLUE}{'─' * 70}{RESET}")
-    print(f"{BLUE}[TEST]{RESET} {msg}")
-    print(f"{BLUE}{'─' * 70}{RESET}")
-def print_success(msg):
-    print(f"{GREEN}  ✓ {msg}{RESET}")
-def print_error(msg):
-    print(f"{RED}  ✗ {msg}{RESET}")
-def print_output(output: str, max_lines: int = 40):
-    """Print the full tool output, indented, with line limit."""
-    lines = output.split("\n")
-    for line in lines[:max_lines]:
-        print(f"{DIM}  │ {RESET}{line}")
-    if len(lines) > max_lines:
-        print(f"{DIM}  │ ... ({len(lines) - max_lines} more lines){RESET}")
-def assert_true(condition: bool, msg: str) -> bool:
-    """Assert and print result. Returns True if passed."""
-    global assertions_passed, assertions_failed
-    if condition:
-        print_success(msg)
-        assertions_passed += 1
-        return True
-    else:
-        print_error(msg)
-        assertions_failed += 1
-        return False
-async def run(args: dict) -> tuple[str, bool]:
-    return await hf_papers_handler(args)
-# ---------------------------------------------------------------------------
-# Test Suite 1: Paper Discovery
-# ---------------------------------------------------------------------------
-async def test_trending():
-    print_test("trending (limit=3)")
-    output, success = await run({"operation": "trending", "limit": 3})
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("# Trending Papers" in output, "has '# Trending Papers' heading")
-    ok &= assert_true("Showing 3 paper(s)" in output, "shows exactly 3 papers")
-    # Check that each paper has an arxiv_id line
-    arxiv_ids = re.findall(r"\*\*arxiv_id:\*\* (\S+)", output)
-    ok &= assert_true(len(arxiv_ids) == 3, f"found 3 arxiv IDs: {arxiv_ids}")
-    # Check that IDs look valid (digits and dots)
-    for aid in arxiv_ids:
-        ok &= assert_true(
-            re.match(r"\d{4}\.\d{4,5}", aid) is not None,
-            f"arxiv_id '{aid}' looks valid (NNNN.NNNNN format)",
-        )
-    # Check each paper has an HF URL
-    hf_urls = re.findall(r"https://huggingface\.co/papers/\S+", output)
-    ok &= assert_true(len(hf_urls) == 3, f"found 3 HF paper URLs")
-    return ok
-async def test_trending_with_query():
-    print_test("trending with query='language' (limit=5)")
-    output, success = await run({"operation": "trending", "query": "language", "limit": 5})
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("Filtered by: 'language'" in output, "shows filter applied")
-    # The filter may return 0-5 results depending on today's papers
-    match = re.search(r"Showing (\d+) paper\(s\)", output)
-    ok &= assert_true(match is not None, "has 'Showing N paper(s)' line")
-    if match:
-        count = int(match.group(1))
-        ok &= assert_true(count <= 5, f"returned {count} papers (within limit)")
-        # If we got results, verify they mention language somewhere
-        if count > 0:
-            print_success(f"got {count} filtered results")
-    return ok
-async def test_search():
-    print_test("search 'direct preference optimization' (limit=3)")
-    output, success = await run(
-        {"operation": "search", "query": "direct preference optimization", "limit": 3}
-    )
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("Papers matching" in output, "has matching header")
-    arxiv_ids = re.findall(r"\*\*arxiv_id:\*\* (\S+)", output)
-    ok &= assert_true(len(arxiv_ids) == 3, f"found 3 results: {arxiv_ids}")
-    # At least one result should mention "preference" in title or summary
-    ok &= assert_true(
-        "preference" in output.lower(),
-        "results mention 'preference' (relevant to query)",
-    )
-    return ok
-async def test_paper_details():
-    print_test("paper_details for 2305.18290 (DPO paper)")
-    output, success = await run({"operation": "paper_details", "arxiv_id": "2305.18290"})
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("Direct Preference Optimization" in output, "title contains 'Direct Preference Optimization'")
-    ok &= assert_true("2305.18290" in output, "contains arxiv_id")
-    ok &= assert_true("https://arxiv.org/abs/2305.18290" in output, "has arxiv URL")
-    ok &= assert_true("https://huggingface.co/papers/2305.18290" in output, "has HF URL")
-    ok &= assert_true("**Authors:**" in output, "has authors section")
-    ok &= assert_true("**upvotes:**" in output, "has upvotes")
-    # Check for abstract or AI summary
-    ok &= assert_true(
-        "## Abstract" in output or "## AI Summary" in output,
-        "has Abstract or AI Summary section",
-    )
-    # Check for next steps hint
-    ok &= assert_true("read_paper" in output, "mentions read_paper as next step")
-    ok &= assert_true("find_all_resources" in output, "mentions find_all_resources as next step")
-    return ok
-# ---------------------------------------------------------------------------
-# Test Suite 2: Read Paper
-# ---------------------------------------------------------------------------
-async def test_read_paper_toc():
-    print_test("read_paper TOC for 2305.18290 (no section → should return abstract + sections)")
-    output, success = await run({"operation": "read_paper", "arxiv_id": "2305.18290"})
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("## Abstract" in output, "has Abstract section")
-    ok &= assert_true("## Sections" in output, "has Sections heading (TOC)")
-    # Check that sections are listed with bold titles
-    section_titles = re.findall(r"- \*\*(.+?)\*\*:", output)
-    ok &= assert_true(len(section_titles) >= 5, f"found {len(section_titles)} sections (expect >=5 for a full paper)")
-    if section_titles:
-        print_success(f"sections found: {section_titles[:5]}{'...' if len(section_titles) > 5 else ''}")
-    # Check that expected DPO paper sections are present
-    section_text = " ".join(section_titles).lower()
-    ok &= assert_true("introduction" in section_text, "'Introduction' section present")
-    ok &= assert_true("experiment" in section_text, "'Experiment' section present")
-    # Check for the tip about reading specific sections
-    ok &= assert_true("section=" in output, "has tip about using section parameter")
-    # Check the abstract has actual content (not empty)
-    abstract_match = re.search(r"## Abstract\n(.+?)(?:\n##|\n\*\*Tip)", output, re.DOTALL)
-    if abstract_match:
-        abstract_text = abstract_match.group(1).strip()
-        ok &= assert_true(len(abstract_text) > 100, f"abstract has real content ({len(abstract_text)} chars)")
-    else:
-        ok &= assert_true(False, "could extract abstract text")
-    return ok
-async def test_read_paper_section_by_number():
-    print_test("read_paper section='4' for 2305.18290")
-    output, success = await run(
-        {"operation": "read_paper", "arxiv_id": "2305.18290", "section": "4"}
-    )
-    print_output(output, max_lines=30)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("https://arxiv.org/abs/2305.18290" in output, "has arxiv URL")
-    # Should have a section heading at top
-    ok &= assert_true(output.startswith("# "), "starts with heading")
-    # Should have substantial content
-    ok &= assert_true(len(output) > 500, f"section has substantial content ({len(output)} chars)")
-    # Should NOT have TOC structure (this is a single section, not the TOC)
-    ok &= assert_true("## Sections" not in output, "is a single section (not TOC)")
-    return ok
-async def test_read_paper_section_by_name():
-    print_test("read_paper section='Experiments' for 2305.18290")
-    output, success = await run(
-        {"operation": "read_paper", "arxiv_id": "2305.18290", "section": "Experiments"}
-    )
-    print_output(output, max_lines=30)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    # Title should contain "Experiments"
-    first_line = output.split("\n")[0]
-    ok &= assert_true(
-        "experiment" in first_line.lower(),
-        f"heading contains 'Experiments': '{first_line}'",
-    )
-    ok &= assert_true(len(output) > 500, f"section has substantial content ({len(output)} chars)")
-    return ok
-async def test_read_paper_old_paper():
-    print_test("read_paper for 1706.03762 (Attention Is All You Need — 2017 paper)")
-    output, success = await run({"operation": "read_paper", "arxiv_id": "1706.03762"})
-    print_output(output, max_lines=30)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("attention" in output.lower(), "mentions 'attention' (relevant content)")
-    # Either we get sections (HTML available) or abstract fallback
-    has_sections = "## Sections" in output
-    has_abstract_fallback = "HTML version not available" in output
-    ok &= assert_true(
-        has_sections or has_abstract_fallback or "## Abstract" in output,
-        "got either full sections, or abstract fallback",
-    )
-    if has_sections:
-        print_success("HTML version available — got full sections")
-    elif has_abstract_fallback:
-        print_success("HTML not available — graceful fallback to abstract")
-    return ok
-# ---------------------------------------------------------------------------
-# Test Suite 3: Linked Resources
-# ---------------------------------------------------------------------------
-async def test_find_datasets():
-    print_test("find_datasets for 2305.18290 (limit=5, sort=downloads)")
-    output, success = await run(
-        {"operation": "find_datasets", "arxiv_id": "2305.18290", "limit": 5}
-    )
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("Datasets linked to paper 2305.18290" in output, "has correct heading")
-    ok &= assert_true("sorted by downloads" in output, "sorted by downloads (default)")
-    # Check we got dataset entries with IDs
-    dataset_ids = re.findall(r"\[([^\]]+)\]\(https://huggingface\.co/datasets/", output)
-    ok &= assert_true(len(dataset_ids) > 0, f"found {len(dataset_ids)} dataset links")
-    if dataset_ids:
-        print_success(f"dataset IDs: {dataset_ids}")
-    # Check download counts are present
-    downloads = re.findall(r"Downloads: ([\d,]+)", output)
-    ok &= assert_true(len(downloads) > 0, f"found download counts: {downloads}")
-    # Check for inspect hint
-    ok &= assert_true("hf_inspect_dataset" in output, "has inspect dataset hint")
-    return ok
-async def test_find_datasets_sort_likes():
-    print_test("find_datasets for 2305.18290 (sort=likes, limit=3)")
-    output, success = await run(
-        {"operation": "find_datasets", "arxiv_id": "2305.18290", "limit": 3, "sort": "likes"}
-    )
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("sorted by likes" in output, "sorted by likes")
-    return ok
-async def test_find_models():
-    print_test("find_models for 2305.18290 (limit=5)")
-    output, success = await run(
-        {"operation": "find_models", "arxiv_id": "2305.18290", "limit": 5}
-    )
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("Models linked to paper 2305.18290" in output, "has correct heading")
-    # Check model links
-    model_ids = re.findall(r"\[([^\]]+)\]\(https://huggingface\.co/", output)
-    ok &= assert_true(len(model_ids) > 0, f"found {len(model_ids)} model links")
-    if model_ids:
-        print_success(f"model IDs: {model_ids}")
-    # Check for pipeline_tag / library info
-    has_task = "Task:" in output
-    has_library = "Library:" in output
-    ok &= assert_true(has_task or has_library, "has Task or Library metadata")
-    return ok
-async def test_find_collections():
-    print_test("find_collections for 2305.18290")
-    output, success = await run(
-        {"operation": "find_collections", "arxiv_id": "2305.18290"}
-    )
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("Collections containing paper" in output, "has correct heading")
-    # Check collection entries
-    collection_urls = re.findall(r"https://huggingface\.co/collections/\S+", output)
-    ok &= assert_true(len(collection_urls) > 0, f"found {len(collection_urls)} collection URLs")
-    # Check for metadata
-    ok &= assert_true("Upvotes:" in output, "has upvote counts")
-    ok &= assert_true("Items:" in output, "has item counts")
-    return ok
-async def test_find_all_resources():
-    print_test("find_all_resources for 2305.18290 (parallel fan-out)")
-    output, success = await run(
-        {"operation": "find_all_resources", "arxiv_id": "2305.18290"}
-    )
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True")
-    ok &= assert_true("# Resources linked to paper 2305.18290" in output, "has unified heading")
-    ok &= assert_true("https://huggingface.co/papers/2305.18290" in output, "has paper URL")
-    # All three sections should be present
-    ok &= assert_true("## Datasets" in output, "has Datasets section")
-    ok &= assert_true("## Models" in output, "has Models section")
-    ok &= assert_true("## Collections" in output, "has Collections section")
-    # Check that sections have actual entries (not just "None found")
-    ok &= assert_true("downloads)" in output, "datasets/models have download counts")
-    return ok
-# ---------------------------------------------------------------------------
-# Test Suite 4: Edge Cases
-# ---------------------------------------------------------------------------
-async def test_search_no_results():
-    print_test("search with gibberish query → should return empty gracefully")
-    output, success = await run(
-        {"operation": "search", "query": "xyzzyplugh_nonexistent_topic_9999"}
-    )
-    print_output(output)
-    ok = True
-    ok &= assert_true(success, "success=True (empty results is not an error)")
-    ok &= assert_true("No papers found" in output, "says 'No papers found'")
-    return ok
-async def test_missing_query():
-    print_test("search without query → should error")
-    output, success = await run({"operation": "search"})
-    print_output(output)
-    ok = True
-    ok &= assert_true(not success, "success=False (missing required param)")
-    ok &= assert_true("required" in output.lower(), "error mentions 'required'")
-    return ok
-async def test_missing_arxiv_id():
-    print_test("find_datasets without arxiv_id → should error")
-    output, success = await run({"operation": "find_datasets"})
-    print_output(output)
-    ok = True
-    ok &= assert_true(not success, "success=False")
-    ok &= assert_true("required" in output.lower(), "error mentions 'required'")
-    return ok
-async def test_invalid_arxiv_id():
-    print_test("paper_details with nonexistent arxiv ID")
-    output, success = await run({"operation": "paper_details", "arxiv_id": "0000.00000"})
-    print_output(output)
-    ok = True
-    ok &= assert_true(not success, "success=False (API returns error)")
-    return ok
-async def test_invalid_operation():
-    print_test("invalid operation name → should error")
-    output, success = await run({"operation": "nonexistent_op"})
-    print_output(output)
-    ok = True
-    ok &= assert_true(not success, "success=False")
-    ok &= assert_true("Unknown operation" in output, "says 'Unknown operation'")
-    ok &= assert_true("trending" in output, "lists valid operations")
-    return ok
-async def test_read_paper_bad_section():
-    print_test("read_paper with nonexistent section → should error with available sections")
-    output, success = await run(
-        {"operation": "read_paper", "arxiv_id": "2305.18290", "section": "Nonexistent Section XYZ"}
-    )
-    print_output(output)
-    ok = True
-    ok &= assert_true(not success, "success=False")
-    ok &= assert_true("not found" in output.lower(), "says section 'not found'")
-    ok &= assert_true("Introduction" in output, "lists available sections (includes Introduction)")
-    return ok
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-async def main():
-    print("=" * 70)
-    print(f"{BLUE}HF Papers Tool — Integration Tests{RESET}")
-    print(f"{BLUE}All APIs are public, no authentication required.{RESET}")
-    print("=" * 70)
-    all_tests = [
-        # Suite 1: Paper Discovery
-        ("Paper Discovery", [
-            test_trending,
-            test_trending_with_query,
-            test_search,
-            test_paper_details,
-        ]),
-        # Suite 2: Read Paper
-        ("Read Paper", [
-            test_read_paper_toc,
-            test_read_paper_section_by_number,
-            test_read_paper_section_by_name,
-            test_read_paper_old_paper,
-        ]),
-        # Suite 3: Linked Resources
-        ("Linked Resources", [
-            test_find_datasets,
-            test_find_datasets_sort_likes,
-            test_find_models,
-            test_find_collections,
-            test_find_all_resources,
-        ]),
-        # Suite 4: Edge Cases
-        ("Edge Cases", [
-            test_search_no_results,
-            test_missing_query,
-            test_missing_arxiv_id,
-            test_invalid_arxiv_id,
-            test_invalid_operation,
-            test_read_paper_bad_section,
-        ]),
-    ]
-    global assertions_passed, assertions_failed
-    suite_results = []
-    for suite_name, tests in all_tests:
-        print(f"\n{YELLOW}{'=' * 70}{RESET}")
-        print(f"{YELLOW}Test Suite: {suite_name} ({len(tests)} tests){RESET}")
-        print(f"{YELLOW}{'=' * 70}{RESET}")
-        suite_pass = 0
-        suite_fail = 0
-        for test_fn in tests:
-            try:
-                test_ok = await test_fn()
-                if test_ok:
-                    suite_pass += 1
-                else:
-                    suite_fail += 1
-            except Exception as e:
-                print_error(f"CRASHED: {e}")
-                import traceback
-                traceback.print_exc()
-                suite_fail += 1
-        suite_results.append((suite_name, suite_pass, suite_fail))
-    # Summary
-    print(f"\n{'=' * 70}")
-    print(f"{BLUE}Summary{RESET}")
-    print(f"{'=' * 70}")
-    for suite_name, sp, sf in suite_results:
-        icon = f"{GREEN}✓{RESET}" if sf == 0 else f"{RED}✗{RESET}"
-        print(f"  {icon} {suite_name}: {sp}/{sp + sf} tests passed")
-    print(f"{'─' * 70}")
-    total_tests = sum(sp + sf for _, sp, sf in suite_results)
-    total_failed = sum(sf for _, _, sf in suite_results)
-    print(f"  Assertions: {assertions_passed} passed, {assertions_failed} failed")
-    print(f"  Tests:      {total_tests - total_failed}/{total_tests} passed")
-    print(f"{'=' * 70}\n")
-    if total_failed > 0 or assertions_failed > 0:
-        sys.exit(1)
-if __name__ == "__main__":
-    asyncio.run(main())

tests/unit/__init__.py DELETED Viewed

File without changes

tests/unit/tools/__init__.py DELETED Viewed

File without changes

tests/unit/tools/test_jobs_tool.py DELETED Viewed

@@ -1,537 +0,0 @@
-"""
-Tests for HF Jobs Tool
-Tests the refactored jobs tool implementation using huggingface-hub library
-"""
-from unittest.mock import AsyncMock, patch
-import pytest
-from agent.tools.jobs_tool import HfJobsTool, hf_jobs_handler
-def create_mock_job_info(
-    job_id="test-job-1",
-    stage="RUNNING",
-    command=None,
-    docker_image="python:3.12",
-):
-    """Create a mock JobInfo object"""
-    from huggingface_hub._jobs_api import JobInfo
-    if command is None:
-        command = ["echo", "test"]
-    return JobInfo(
-        id=job_id,
-        created_at="2024-01-01T00:00:00.000000Z",
-        docker_image=docker_image,
-        space_id=None,
-        command=command,
-        arguments=[],
-        environment={},
-        secrets={},
-        flavor="cpu-basic",
-        status={"stage": stage, "message": None},
-        owner={"id": "123", "name": "test-user", "type": "user"},
-        endpoint="https://huggingface.co",
-        url=f"https://huggingface.co/jobs/test-user/{job_id}",
-    )
-def create_mock_scheduled_job_info(
-    job_id="sched-job-1",
-    schedule="@daily",
-    suspend=False,
-):
-    """Create a mock ScheduledJobInfo object"""
-    from huggingface_hub._jobs_api import ScheduledJobInfo
-    return ScheduledJobInfo(
-        id=job_id,
-        created_at="2024-01-01T00:00:00.000000Z",
-        job_spec={
-            "docker_image": "python:3.12",
-            "space_id": None,
-            "command": ["python", "backup.py"],
-            "arguments": [],
-            "environment": {},
-            "secrets": {},
-            "flavor": "cpu-basic",
-            "timeout": 1800,
-            "tags": None,
-            "arch": None,
-        },
-        schedule=schedule,
-        suspend=suspend,
-        concurrency=False,
-        status={
-            "last_job": None,
-            "next_job_run_at": "2024-01-02T00:00:00.000000Z",
-        },
-        owner={"id": "123", "name": "test-user", "type": "user"},
-    )
-@pytest.mark.asyncio
-async def test_show_help():
-    """Test that help message is shown when no operation specified"""
-    tool = HfJobsTool()
-    result = await tool.execute({})
-    assert "HuggingFace Jobs API" in result["formatted"]
-    assert "Available Commands" in result["formatted"]
-    assert result["totalResults"] == 1
-    assert not result.get("isError", False)
-@pytest.mark.asyncio
-async def test_show_operation_help():
-    """Test operation-specific help"""
-    tool = HfJobsTool()
-    result = await tool.execute({"operation": "run", "args": {"help": True}})
-    assert "Help for operation" in result["formatted"]
-    assert result["totalResults"] == 1
-@pytest.mark.asyncio
-async def test_invalid_operation():
-    """Test invalid operation handling"""
-    tool = HfJobsTool()
-    result = await tool.execute({"operation": "invalid_op"})
-    assert result.get("isError") == True
-    assert "Unknown operation" in result["formatted"]
-@pytest.mark.asyncio
-async def test_run_job_missing_command():
-    """Test run job with missing required parameter"""
-    tool = HfJobsTool()
-    # Mock the HfApi.run_job to raise an error
-    with patch.object(tool.api, "run_job") as mock_run:
-        mock_run.side_effect = Exception("command parameter is required")
-        result = await tool.execute(
-            {"operation": "run", "args": {"image": "python:3.12"}}
-        )
-        assert result.get("isError") == True
-@pytest.mark.asyncio
-async def test_list_jobs_mock():
-    """Test list jobs with mock API"""
-    tool = HfJobsTool()
-    # Create mock job objects
-    running_job = create_mock_job_info("test-job-1", "RUNNING")
-    completed_job = create_mock_job_info(
-        "test-job-2", "COMPLETED", ["python", "script.py"]
-    )
-    # Mock the HfApi.list_jobs method
-    with patch.object(tool.api, "list_jobs") as mock_list:
-        mock_list.return_value = [running_job, completed_job]
-        # Test listing only running jobs (default)
-        result = await tool.execute({"operation": "ps"})
-        assert not result.get("isError", False)
-        assert "test-job-1" in result["formatted"]
-        assert "test-job-2" not in result["formatted"]  # COMPLETED jobs filtered out
-        assert result["totalResults"] == 1
-        assert result["resultsShared"] == 1
-        # Test listing all jobs
-        result = await tool.execute({"operation": "ps", "args": {"all": True}})
-        assert not result.get("isError", False)
-        assert "test-job-1" in result["formatted"]
-        assert "test-job-2" in result["formatted"]
-        assert result["totalResults"] == 2
-        assert result["resultsShared"] == 2
-@pytest.mark.asyncio
-async def test_inspect_job_mock():
-    """Test inspect job with mock API"""
-    tool = HfJobsTool()
-    mock_job = create_mock_job_info("test-job-1", "RUNNING")
-    with patch.object(tool.api, "inspect_job") as mock_inspect:
-        mock_inspect.return_value = mock_job
-        result = await tool.execute(
-            {"operation": "inspect", "args": {"job_id": "test-job-1"}}
-        )
-        assert not result.get("isError", False)
-        assert "test-job-1" in result["formatted"]
-        assert "Job Details" in result["formatted"]
-        mock_inspect.assert_called_once()
-@pytest.mark.asyncio
-async def test_cancel_job_mock():
-    """Test cancel job with mock API"""
-    tool = HfJobsTool()
-    with patch.object(tool.api, "cancel_job") as mock_cancel:
-        mock_cancel.return_value = None
-        result = await tool.execute(
-            {"operation": "cancel", "args": {"job_id": "test-job-1"}}
-        )
-        assert not result.get("isError", False)
-        assert "cancelled" in result["formatted"]
-        assert "test-job-1" in result["formatted"]
-        mock_cancel.assert_called_once()
-@pytest.mark.asyncio
-async def test_run_job_mock():
-    """Test run job with mock API"""
-    tool = HfJobsTool()
-    mock_job = create_mock_job_info("new-job-123", "RUNNING")
-    with patch.object(tool.api, "run_job") as mock_run:
-        mock_run.return_value = mock_job
-        result = await tool.execute(
-            {
-                "operation": "run",
-                "args": {
-                    "image": "python:3.12",
-                    "command": ["python", "-c", "print('test')"],
-                    "flavor": "cpu-basic",
-                    "detach": True,
-                },
-            }
-        )
-        assert not result.get("isError", False)
-        assert "new-job-123" in result["formatted"]
-        assert "Job started" in result["formatted"]
-        mock_run.assert_called_once()
-@pytest.mark.asyncio
-async def test_run_uv_job_mock():
-    """Test run UV job with mock API"""
-    tool = HfJobsTool()
-    mock_job = create_mock_job_info("uv-job-456", "RUNNING")
-    with patch.object(tool.api, "run_uv_job") as mock_run:
-        mock_run.return_value = mock_job
-        result = await tool.execute(
-            {
-                "operation": "uv",
-                "args": {
-                    "script": "print('Hello UV')",
-                    "flavor": "cpu-basic",
-                },
-            }
-        )
-        assert not result.get("isError", False)
-        assert "uv-job-456" in result["formatted"]
-        assert "UV Job started" in result["formatted"]
-        mock_run.assert_called_once()
-@pytest.mark.asyncio
-async def test_get_logs_mock():
-    """Test get logs with mock API"""
-    tool = HfJobsTool()
-    # Mock fetch_job_logs to return a generator
-    def log_generator():
-        yield "Log line 1"
-        yield "Log line 2"
-        yield "Hello from HF Jobs!"
-    with patch.object(tool.api, "fetch_job_logs") as mock_logs:
-        mock_logs.return_value = log_generator()
-        result = await tool.execute(
-            {"operation": "logs", "args": {"job_id": "test-job-1"}}
-        )
-        assert not result.get("isError", False)
-        assert "Log line 1" in result["formatted"]
-        assert "Hello from HF Jobs!" in result["formatted"]
-@pytest.mark.asyncio
-async def test_handler():
-    """Test the handler function"""
-    with patch("agent.tools.jobs_tool.HfJobsTool") as MockTool:
-        mock_tool_instance = MockTool.return_value
-        mock_tool_instance.execute = AsyncMock(
-            return_value={
-                "formatted": "Test output",
-                "totalResults": 1,
-                "resultsShared": 1,
-                "isError": False,
-            }
-        )
-        output, success = await hf_jobs_handler({"operation": "ps"})
-        assert success == True
-        assert "Test output" in output
-@pytest.mark.asyncio
-async def test_handler_error():
-    """Test handler with error"""
-    with patch("agent.tools.jobs_tool.HfJobsTool") as MockTool:
-        MockTool.side_effect = Exception("Test error")
-        output, success = await hf_jobs_handler({})
-        assert success == False
-        assert "Error" in output
-@pytest.mark.asyncio
-async def test_scheduled_jobs_mock():
-    """Test scheduled jobs operations with mock API"""
-    tool = HfJobsTool()
-    mock_scheduled_job = create_mock_scheduled_job_info()
-    # Test list scheduled jobs
-    with patch.object(tool.api, "list_scheduled_jobs") as mock_list:
-        mock_list.return_value = [mock_scheduled_job]
-        result = await tool.execute({"operation": "scheduled ps"})
-        assert not result.get("isError", False)
-        assert "sched-job-1" in result["formatted"]
-        assert "Scheduled Jobs" in result["formatted"]
-@pytest.mark.asyncio
-async def test_create_scheduled_job_mock():
-    """Test create scheduled job with mock API"""
-    tool = HfJobsTool()
-    mock_scheduled_job = create_mock_scheduled_job_info()
-    with patch.object(tool.api, "create_scheduled_job") as mock_create:
-        mock_create.return_value = mock_scheduled_job
-        result = await tool.execute(
-            {
-                "operation": "scheduled run",
-                "args": {
-                    "image": "python:3.12",
-                    "command": ["python", "backup.py"],
-                    "schedule": "@daily",
-                    "flavor": "cpu-basic",
-                },
-            }
-        )
-        assert not result.get("isError", False)
-        assert "sched-job-1" in result["formatted"]
-        assert "Scheduled job created" in result["formatted"]
-        mock_create.assert_called_once()
-@pytest.mark.asyncio
-async def test_inspect_scheduled_job_mock():
-    """Test inspect scheduled job with mock API"""
-    tool = HfJobsTool()
-    mock_scheduled_job = create_mock_scheduled_job_info()
-    with patch.object(tool.api, "inspect_scheduled_job") as mock_inspect:
-        mock_inspect.return_value = mock_scheduled_job
-        result = await tool.execute(
-            {
-                "operation": "scheduled inspect",
-                "args": {"scheduled_job_id": "sched-job-1"},
-            }
-        )
-        assert not result.get("isError", False)
-        assert "sched-job-1" in result["formatted"]
-        assert "Scheduled Job Details" in result["formatted"]
-@pytest.mark.asyncio
-async def test_suspend_scheduled_job_mock():
-    """Test suspend scheduled job with mock API"""
-    tool = HfJobsTool()
-    with patch.object(tool.api, "suspend_scheduled_job") as mock_suspend:
-        mock_suspend.return_value = None
-        result = await tool.execute(
-            {
-                "operation": "scheduled suspend",
-                "args": {"scheduled_job_id": "sched-job-1"},
-            }
-        )
-        assert not result.get("isError", False)
-        assert "suspended" in result["formatted"]
-        assert "sched-job-1" in result["formatted"]
-@pytest.mark.asyncio
-async def test_resume_scheduled_job_mock():
-    """Test resume scheduled job with mock API"""
-    tool = HfJobsTool()
-    with patch.object(tool.api, "resume_scheduled_job") as mock_resume:
-        mock_resume.return_value = None
-        result = await tool.execute(
-            {
-                "operation": "scheduled resume",
-                "args": {"scheduled_job_id": "sched-job-1"},
-            }
-        )
-        assert not result.get("isError", False)
-        assert "resumed" in result["formatted"]
-        assert "sched-job-1" in result["formatted"]
-@pytest.mark.asyncio
-async def test_delete_scheduled_job_mock():
-    """Test delete scheduled job with mock API"""
-    tool = HfJobsTool()
-    with patch.object(tool.api, "delete_scheduled_job") as mock_delete:
-        mock_delete.return_value = None
-        result = await tool.execute(
-            {
-                "operation": "scheduled delete",
-                "args": {"scheduled_job_id": "sched-job-1"},
-            }
-        )
-        assert not result.get("isError", False)
-        assert "deleted" in result["formatted"]
-        assert "sched-job-1" in result["formatted"]
-@pytest.mark.asyncio
-async def test_list_jobs_with_status_filter():
-    """Test list jobs with status filter"""
-    tool = HfJobsTool()
-    running_job = create_mock_job_info("job-1", "RUNNING")
-    completed_job = create_mock_job_info("job-2", "COMPLETED")
-    error_job = create_mock_job_info("job-3", "ERROR")
-    with patch.object(tool.api, "list_jobs") as mock_list:
-        mock_list.return_value = [running_job, completed_job, error_job]
-        # Filter by status
-        result = await tool.execute(
-            {"operation": "ps", "args": {"all": True, "status": "ERROR"}}
-        )
-        assert not result.get("isError", False)
-        assert "job-3" in result["formatted"]
-        assert "job-1" not in result["formatted"]
-        assert result["resultsShared"] == 1
-def test_filter_uv_install_output():
-    """Test filtering of UV package installation output"""
-    from agent.tools.jobs_tool import _filter_uv_install_output
-    # Test case 1: Logs with UV installation output
-    logs_with_install = [
-        "Resolved 68 packages in 1.01s",
-        "Installed 68 packages in 251ms",
-        "Hello from the script!",
-        "Script execution completed",
-    ]
-    filtered = _filter_uv_install_output(logs_with_install)
-    assert len(filtered) == 4
-    assert filtered[0] == "[installs truncated]"
-    assert filtered[1] == "Installed 68 packages in 251ms"
-    assert filtered[2] == "Hello from the script!"
-    assert filtered[3] == "Script execution completed"
-    # Test case 2: Logs without UV installation output
-    logs_without_install = [
-        "Script started",
-        "Processing data...",
-        "Done!",
-    ]
-    filtered = _filter_uv_install_output(logs_without_install)
-    assert len(filtered) == 3
-    assert filtered == logs_without_install
-    # Test case 3: Empty logs
-    assert _filter_uv_install_output([]) == []
-    # Test case 4: Different time formats (ms vs s)
-    logs_with_seconds = [
-        "Downloading packages...",
-        "Installed 10 packages in 2s",
-        "Running main.py",
-    ]
-    filtered = _filter_uv_install_output(logs_with_seconds)
-    assert len(filtered) == 3
-    assert filtered[0] == "[installs truncated]"
-    assert filtered[1] == "Installed 10 packages in 2s"
-    assert filtered[2] == "Running main.py"
-    # Test case 5: Single package
-    logs_single_package = [
-        "Resolving dependencies",
-        "Installed 1 package in 50ms",
-        "Import successful",
-    ]
-    filtered = _filter_uv_install_output(logs_single_package)
-    assert len(filtered) == 3
-    assert filtered[0] == "[installs truncated]"
-    assert filtered[1] == "Installed 1 package in 50ms"
-    assert filtered[2] == "Import successful"
-    # Test case 6: Decimal time values
-    logs_decimal_time = [
-        "Starting installation",
-        "Installed 25 packages in 125.5ms",
-        "All dependencies ready",
-    ]
-    filtered = _filter_uv_install_output(logs_decimal_time)
-    assert len(filtered) == 3
-    assert filtered[0] == "[installs truncated]"
-    assert filtered[1] == "Installed 25 packages in 125.5ms"
-    assert filtered[2] == "All dependencies ready"
-    # Test case 7: "Installed" line is first (no truncation needed)
-    logs_install_first = [
-        "Installed 5 packages in 100ms",
-        "Running script...",
-    ]
-    filtered = _filter_uv_install_output(logs_install_first)
-    # No truncation message if "Installed" is the first line
-    assert filtered == logs_install_first