# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Code Review Environment Implementation.

Supports three grader difficulty levels: "easy", "medium", "hard".
Pass `grader_level` to the constructor to select the desired tier.
"""

from uuid import uuid4

from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State

try:
    from ..models import (
        CodeReviewAction,
        CodeReviewObservation,
        CodeReviewReward,
        CodeReviewPullRequest,
        CodeReviewStepResponse,
    )
except ImportError:
    from models import (
        CodeReviewAction,
        CodeReviewObservation,
        CodeReviewReward,
        CodeReviewPullRequest,
        CodeReviewStepResponse,
    )

import json
from pathlib import Path

try:
    from .graders import get_grader
except ImportError:
    from graders import get_grader

dataset_path = Path(__file__).parent.parent / "dataset" / "dataset.json"


class CodeReviewEnvironment(Environment):
    """
    Code Review environment with configurable grading difficulty.

    Args:
        grader_level: Grading difficulty — one of "easy", "medium", "hard".
                      Defaults to "medium".

    Example:
        >>> env = CodeReviewEnvironment(grader_level="hard")
        >>> obs = env.reset()
        >>> obs = env.step(CodeReviewAction(action_type="final_decision", decision="approve"))
    """

    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    def __init__(self, grader_level: str = "medium"):
        """Initialise the environment with the chosen grader tier."""
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self._reset_count = 0
        self.max_steps = 5
        self.task_index = 0

        with open(dataset_path) as f:
            self.dataset = json.load(f)

        self.reset()

    def reset(self) -> CodeReviewObservation:
        """Reset the environment and advance to the next task."""
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self._reset_count += 1
        self.task_index += 1

        self.sample = self.dataset[self.task_index % len(self.dataset)]

        self.pr           = CodeReviewPullRequest(**self.sample["pr"])
        self.gt           = self.sample["ground_truth"]
        self.task_type    = self.sample.get("task_type", "unknown")
        grader_level      = self.task_type if self.task_type in ("easy", "medium", "hard") else "medium"
        self.grader       = get_grader(grader_level)
        self.grader_level = grader_level

        self.history            = []
        self.step_count         = 0
        self.done               = False
        self.issues_identified  = []
        self.fix_attempted      = False

        return CodeReviewObservation(
            pr=self.pr,
            previous_comments=self.history,
            step_count=self.step_count,
            max_steps=self.max_steps,
            reward=0.0,
            done=False,
        )

    def step(self, action: CodeReviewAction) -> CodeReviewStepResponse:  # type: ignore[override]
        """Execute one step: grade the action and return an observation + reward."""
        self._state.step_count += 1

        # ------------------------------------------------------------------
        # Normalise action into a CodeReviewAction object
        # ------------------------------------------------------------------
        try:
            if isinstance(action, dict):
                action = CodeReviewAction(**action)
            elif isinstance(action, (list, tuple)):
                action = CodeReviewAction(
                    action_type=action[0],
                    comment=action[1]      if len(action) > 1 else None,
                    suggested_code=action[2] if len(action) > 2 else None,
                    decision=action[3]     if len(action) > 3 else None,
                )
            elif isinstance(action, CodeReviewAction):
                pass
            else:
                raise ValueError(f"Unsupported action type: {type(action)}")
        except Exception as e:
            print(f"Error processing action: {e}")
            return self._invalid_step()

        # ------------------------------------------------------------------
        # Update state
        # ------------------------------------------------------------------
        self.step_count += 1
        self.history.append(action)

        if action.action_type == "comment" and action.comment:
            self.issues_identified.append(action.comment)

        if action.action_type == "suggest_fix":
            self.fix_attempted = True

        # ------------------------------------------------------------------
        # Score via the active grader
        # ------------------------------------------------------------------
        score = self.grader.grade_action(action, self.gt)
        bonus = self.grader.compute_step_bonus(action, self.step_count, self.history)

        score = max(0.01, min(score + bonus, 0.99))

        done = (
            action.action_type == "final_decision"
            or self.step_count >= self.max_steps
        )

        if done:
            score = self.grader.compute_done_score(self.history, self.gt)

        # ------------------------------------------------------------------
        # Build response
        # ------------------------------------------------------------------
        obs = CodeReviewObservation(
            pr=self.pr,
            previous_comments=[a.comment for a in self.history if a.comment],
            step_count=self.step_count,
            max_steps=self.max_steps,
        )

        rew = CodeReviewReward(score=score, feedback="graded")
        print(f"[{self.grader_level.upper()}] Step {self.step_count} — Score: {rew.score:.4f}")

        return CodeReviewStepResponse(
            observation=obs,
            reward=rew.score,
            done=done,
            info={
                "grader_level":      self.grader_level,
                "task_type":         self.task_type,
                "issues_identified": len(self.issues_identified),
                "fix_attempted":     self.fix_attempted,
            },
        )

    @property
    def state(self) -> State:
        return self._state

    def _invalid_step(self) -> CodeReviewStepResponse:
        rew = CodeReviewReward(score=0.0, feedback="invalid action")
        obs = CodeReviewObservation(
            pr=self.pr,
            previous_comments=[a.comment for a in self.history if a.comment],
            step_count=self.step_count,
            max_steps=self.max_steps,
        )
        return CodeReviewStepResponse(
            observation=obs,
            reward=rew,
            done=True,
            info={"error": "invalid_action"},
        )