"""
GitHub Find Examples Tool

Finds examples, guides, and tutorials for a library using deterministic queries and heuristics.
"""

import asyncio
import math
import os
from dataclasses import asdict, dataclass
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional

try:
    import requests
except ImportError:
    raise ImportError(
        "requests library is required. Install with: pip install requests"
    )

from agent.tools.types import ToolResult


@dataclass
class Example:
    """An example file with metadata and relevance score."""

    repo: str
    path: str
    ref: str
    url: str
    score: float
    reason: str
    repo_stars: int
    repo_updated: str
    file_size: int

    def to_dict(self):
        return asdict(self)


class GitHubAPIError(Exception):
    """Raised when GitHub API returns an error."""

    pass


# Path-based scoring weights
PATH_SCORES = {
    "README.md": 100,
    "readme.md": 100,
    "docs/": 80,
    "doc/": 80,
    "examples/": 90,
    "example/": 90,
    "notebooks/": 70,
    "notebook/": 70,
    "tutorials/": 85,
    "tutorial/": 85,
    "guides/": 85,
    "guide/": 85,
    "tests/": 40,
    "test/": 40,
    "demos/": 75,
    "demo/": 75,
    "samples/": 75,
    "sample/": 75,
}

# Content-based scoring keywords
CONTENT_KEYWORDS = {
    'if __name__ == "__main__"': 50,
    "if __name__ == '__main__'": 50,
    "quickstart": 60,
    "quick start": 60,
    "getting started": 60,
    "tutorial": 50,
    "example usage": 55,
    "usage example": 55,
    "how to use": 45,
    "basic example": 50,
    "simple example": 50,
}

# File extension preferences
PREFERRED_EXTENSIONS = {
    ".py": 10,
    ".ipynb": 15,
    ".md": 20,
    ".rst": 10,
    ".js": 10,
    ".ts": 10,
    ".go": 10,
    ".java": 10,
    ".cpp": 10,
    ".c": 10,
}


def _get_github_token() -> str:
    """Get GitHub token from environment."""
    token = os.environ.get("GITHUB_TOKEN")
    if not token:
        raise GitHubAPIError(
            "GITHUB_TOKEN environment variable is required. "
            "Set it with: export GITHUB_TOKEN=your_token_here"
        )
    return token


def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, Any]]:
    """Execute a GitHub code search query."""
    headers = {
        "Accept": "application/vnd.github.text-match+json",
        "X-GitHub-Api-Version": "2022-11-28",
        "Authorization": f"Bearer {token}",
    }

    results = []
    page = 1
    per_page = min(100, limit)

    try:
        while len(results) < limit:
            params = {"q": query, "per_page": per_page, "page": page}
            url = "https://api.github.com/search/code"
            response = requests.get(url, headers=headers, params=params, timeout=30)

            if response.status_code != 200:
                break

            data = response.json()
            items = data.get("items", [])

            if not items:
                break

            for item in items:
                results.append(
                    {
                        "repo": item.get("repository", {}).get("full_name", ""),
                        "path": item.get("path", ""),
                        "sha": item.get("sha", ""),
                        "url": item.get("html_url", ""),
                        "size": item.get("size", 0),
                        "text_matches": item.get("text_matches", []),
                    }
                )

            if len(results) >= limit or len(items) < per_page:
                break

            page += 1

    except Exception:
        pass

    return results[:limit]


def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, Any]]:
    """Fetch metadata for repositories."""
    headers = {
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28",
        "Authorization": f"Bearer {token}",
    }

    metadata = {}

    for repo in repos:
        try:
            url = f"https://api.github.com/repos/{repo}"
            response = requests.get(url, headers=headers, timeout=10)

            if response.status_code == 200:
                data = response.json()
                metadata[repo] = {
                    "stars": data.get("stargazers_count", 0),
                    "updated_at": data.get("updated_at", ""),
                    "description": data.get("description", ""),
                }
        except:
            continue

    return metadata


def _score_and_rank(
    results: List[Dict[str, Any]], library: str, token: str
) -> List[Example]:
    """Score results based on heuristics and rank them."""
    repos = list(set(r["repo"] for r in results))
    repo_metadata = _fetch_repo_metadata(repos, token)

    scored_examples = []

    for result in results:
        repo = result["repo"]
        path = result["path"]

        score = 0.0
        reasons = []

        # Path-based scoring
        path_lower = path.lower()
        for pattern, points in PATH_SCORES.items():
            if pattern.lower() in path_lower:
                score += points
                reasons.append(f"in {pattern}")
                break

        # File extension scoring
        for ext, points in PREFERRED_EXTENSIONS.items():
            if path_lower.endswith(ext):
                score += points
                break

        # Content-based scoring
        text_content = ""
        for match in result.get("text_matches", []):
            text_content += match.get("fragment", "").lower() + " "

        for keyword, points in CONTENT_KEYWORDS.items():
            if keyword.lower() in text_content:
                score += points
                reasons.append(f"contains '{keyword}'")

        # Repo-based scoring
        metadata = repo_metadata.get(repo, {})
        stars = metadata.get("stars", 0)
        updated = metadata.get("updated_at", "")

        if stars > 0:
            star_score = math.log10(stars + 1) * 10
            score += star_score

        # Recency bonus
        if updated:
            try:
                updated_date = datetime.fromisoformat(updated.replace("Z", "+00:00"))
                if datetime.now(updated_date.tzinfo) - updated_date < timedelta(
                    days=180
                ):
                    score += 20
                    reasons.append("recently updated")
            except:
                pass

        # Filename quality
        filename = path.split("/")[-1].lower()
        if any(
            word in filename
            for word in ["example", "tutorial", "guide", "quickstart", "demo"]
        ):
            score += 30
            reasons.append("descriptive filename")

        # Size penalty
        if result["size"] > 100000:
            score *= 0.5
            reasons.append("large file")

        example = Example(
            repo=repo,
            path=path,
            ref=result["sha"],
            url=result["url"],
            score=score,
            reason=", ".join(reasons) if reasons else "matches library",
            repo_stars=stars,
            repo_updated=updated,
            file_size=result["size"],
        )

        scored_examples.append(example)

    scored_examples.sort(key=lambda x: x.score, reverse=True)
    return scored_examples


def _search_by_path(
    library: str, org: str, repo_scope: Optional[str], token: str
) -> List[Dict[str, Any]]:
    """Search for library in example/tutorial/docs directories."""
    results = []
    path_patterns = [
        "examples/",
        "example/",
        "docs/",
        "tutorials/",
        "notebooks/",
        "guides/",
    ]

    for path in path_patterns:
        query_parts = [f"org:{org}", f"{library}", f"path:{path}"]
        if repo_scope:
            query_parts[0] = f"repo:{org}/{repo_scope}"

        query = " ".join(query_parts)
        results.extend(_execute_search(query, token, limit=20))

    return results


def _search_by_content(
    library: str, org: str, repo_scope: Optional[str], token: str
) -> List[Dict[str, Any]]:
    """Search for library with specific content patterns."""
    results = []
    content_patterns = [
        f"{library} if __name__",
        f"{library} quickstart",
        f"{library} tutorial",
        f"{library} usage example",
    ]

    for pattern in content_patterns:
        query_parts = [f"org:{org}", pattern]
        if repo_scope:
            query_parts[0] = f"repo:{org}/{repo_scope}"

        query = " ".join(query_parts)
        results.extend(_execute_search(query, token, limit=15))

    return results


def _search_readmes(
    library: str, org: str, repo_scope: Optional[str], token: str
) -> List[Dict[str, Any]]:
    """Search for library mentions in README files."""
    query_parts = [f"org:{org}", f"{library}", "filename:README"]
    if repo_scope:
        query_parts[0] = f"repo:{org}/{repo_scope}"

    query = " ".join(query_parts)
    return _execute_search(query, token, limit=20)


def find_examples(
    library: str,
    org: str = "huggingface",
    repo_scope: Optional[str] = None,
    max_results: int = 10,
) -> List[Example]:
    """
    Find examples, guides, and tutorials for a library using deterministic queries.

    Uses a playbook of smart searches and heuristics to find canonical examples:
    - Prefers README.md, docs/**, examples/**, notebooks/**, tests/**
    - Prefers files with if __name__ == "__main__", "quickstart", "tutorial"
    - Prefers repos with higher stars and more recent updates

    Args:
        library: Library name to search for (e.g., "transformers", "torch")
        org: GitHub organization to search in (default: "huggingface")
        repo_scope: Optional specific repository (e.g., "transformers")
        max_results: Maximum number of results to return (default: 10)

    Returns:
        List of Example objects, ranked by relevance score
    """
    token = _get_github_token()

    all_results = []
    all_results.extend(_search_by_path(library, org, repo_scope, token))
    all_results.extend(_search_by_content(library, org, repo_scope, token))
    all_results.extend(_search_readmes(library, org, repo_scope, token))

    # Deduplicate
    seen = set()
    unique_results = []
    for result in all_results:
        key = (result["repo"], result["path"])
        if key not in seen:
            seen.add(key)
            unique_results.append(result)

    scored_examples = _score_and_rank(unique_results, library, token)
    return scored_examples[:max_results]


async def _async_call(func, *args, **kwargs):
    """Wrap synchronous calls for async context."""
    return await asyncio.to_thread(func, *args, **kwargs)


def _format_examples_table(examples: List[Example]) -> str:
    """Format examples as a markdown table."""
    if not examples:
        return "No examples found."

    lines = [
        "| Rank | File | Score | Stars | Reason |",
        "|------|------|-------|-------|--------|",
    ]

    for i, ex in enumerate(examples, 1):
        file_path = f"{ex.repo}/{ex.path}"
        if len(file_path) > 60:
            file_path = file_path[:57] + "..."
        reason = ex.reason if len(ex.reason) < 40 else ex.reason[:37] + "..."
        lines.append(
            f"| {i} | {file_path} | {ex.score:.1f} | {ex.repo_stars:,} | {reason} |"
        )

    return "\n".join(lines)


class FindExamplesTool:
    """Tool for finding examples and tutorials for libraries."""

    async def execute(self, params: Dict[str, Any]) -> ToolResult:
        """Execute find_examples operation."""
        library = params.get("library")
        if not library:
            return {
                "formatted": "Error: 'library' parameter is required",
                "totalResults": 0,
                "resultsShared": 0,
                "isError": True,
            }

        org = params.get("org", "huggingface")
        repo_scope = params.get("repo_scope")
        max_results = params.get("max_results", 10)

        try:
            examples = await _async_call(
                find_examples,
                library=library,
                org=org,
                repo_scope=repo_scope,
                max_results=max_results,
            )

            if not examples:
                return {
                    "formatted": f"No examples found for '{library}' in {org}",
                    "totalResults": 0,
                    "resultsShared": 0,
                }

            table = _format_examples_table(examples)
            response = f"**Found {len(examples)} examples for '{library}' in {org}:**\n\n{table}"

            # Add URLs and suggest using read_file
            response += "\n\n**Top examples (use read_file to view):**\n"
            for i, ex in enumerate(examples[:3], 1):
                response += f"{i}. [{ex.repo}/{ex.path}]({ex.url})\n"
                response += f"   Use: read_file(repo='{ex.repo}', path='{ex.path}')\n"

            return {
                "formatted": response,
                "totalResults": len(examples),
                "resultsShared": len(examples),
            }

        except GitHubAPIError as e:
            return {
                "formatted": f"GitHub API Error: {str(e)}",
                "totalResults": 0,
                "resultsShared": 0,
                "isError": True,
            }
        except Exception as e:
            return {
                "formatted": f"Error: {str(e)}",
                "totalResults": 0,
                "resultsShared": 0,
                "isError": True,
            }


# Tool specification
FIND_EXAMPLES_TOOL_SPEC = {
    "name": "find_examples",
    "description": (
        "Find examples, guides, and tutorials for a library using deterministic queries and heuristics.\n\n"
        "Uses best practices retrieval without semantic search:\n"
        "- Prefers README.md, docs/**, examples/**, notebooks/**, tests/**\n"
        "- Prefers files with if __name__ == '__main__', 'quickstart', 'tutorial', 'usage'\n"
        "- Prefers repos with higher stars and more recent updates\n\n"
        "Returns a ranked list of canonical example files.\n\n"
        "Examples:\n"
        "- Find transformers examples: {'library': 'transformers', 'org': 'huggingface', 'max_results': 5}\n"
        "- Find torch examples in specific repo: {'library': 'torch', 'org': 'pytorch', 'repo_scope': 'examples'}\n\n"
        "Use read_file tool to view the content of returned files.\n\n"
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "library": {
                "type": "string",
                "description": "Library name to search for (e.g., 'transformers', 'torch', 'react')",
            },
            "org": {
                "type": "string",
                "description": "GitHub organization to search in (default: 'huggingface')",
            },
            "repo_scope": {
                "type": "string",
                "description": "Optional specific repository to search within",
            },
            "max_results": {
                "type": "integer",
                "description": "Maximum number of results to return (default: 10)",
            },
        },
        "required": ["library"],
    },
}


async def find_examples_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
    """Handler for agent tool router."""
    try:
        tool = FindExamplesTool()
        result = await tool.execute(arguments)
        return result["formatted"], not result.get("isError", False)
    except Exception as e:
        return f"Error executing find_examples: {str(e)}", False