import textwrap
import sys
from pathlib import Path

import pytest

PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from measures.VocabularyAnalyser import VocabularyAnalyser


@pytest.fixture
def glossary_file(tmp_path):
    """Create a small glossary CSV for testing."""
    csv_content = textwrap.dedent(
        """\
        acute,,,
        acute angle, acute angles,,
        acute triangle, acute triangles,,
        add, added, adding, adds
        addend, addends,,
        """
    )
    path = tmp_path / "glossary.csv"
    path.write_text(csv_content, encoding="utf-8")
    return str(path)


class DummyUtterance:
    def __init__(self, speaker, text):
        self.speaker = speaker
        self.text = text
        self.vocabulary_terms = None
        self.vocabulary_matches = None


class DummyTranscript:
    def __init__(self, utterances):
        self.utterances = utterances


@pytest.fixture
def analyser(glossary_file):
    return VocabularyAnalyser(glossary_file)


def test_match_counts_base_once(analyser):
    text = "Add add ADD adding added adds"
    assert analyser.match_one_utterance(text) == ["add"]


def test_match_prefers_longest_phrase(analyser):
    text = "An acute angle appears in this proof."
    assert analyser.match_one_utterance(text) == ["acute angle"]


def test_match_handles_overlapping_and_distinct_terms(analyser):
    text = (
        "The class studied the properties of an acute triangle, then discussed an acute situation."
    )
    assert analyser.match_one_utterance(text) == [
        "acute",
        "acute triangle",
    ]


def test_run_analysis_adds_vocabulary_terms_and_matches(analyser):
    transcript = DummyTranscript(
        [
            DummyUtterance("Teacher", "We add addends in this acute triangle."),
            DummyUtterance("Student", "Acute angles contrast with obtuse ones."),
            DummyUtterance("Teacher", "No glossary matches"),
        ]
    )

    result = analyser.run_analysis(transcript)

    assert result is transcript
    assert transcript.utterances[0].vocabulary_terms == ["acute triangle", "add", "addend"]
    assert transcript.utterances[1].vocabulary_terms == ["acute angle"]
    assert transcript.utterances[2].vocabulary_terms == []

    assert transcript.utterances[0].vocabulary_matches == {
        "acute triangle": [
            {"form": "acute triangle", "start": 23, "end": 37},
        ],
        "add": [
            {"form": "add", "start": 3, "end": 6},
        ],
        "addend": [
            {"form": "addends", "start": 7, "end": 14},
        ],
    }
    assert transcript.utterances[1].vocabulary_matches == {
        "acute angle": [
            {"form": "acute angles", "start": 0, "end": 12},
        ]
    }
    assert transcript.utterances[2].vocabulary_matches == {}


def test_vocabulary_matches_capture_multiple_occurrences(analyser):
    transcript = DummyTranscript([
        DummyUtterance("Teacher", "Add adds add."),
    ])

    analyser.run_analysis(transcript)

    matches = transcript.utterances[0].vocabulary_matches
    assert transcript.utterances[0].vocabulary_terms == ["add"]
    assert matches["add"] == [
        {"form": "add", "start": 0, "end": 3},
        {"form": "adds", "start": 4, "end": 8},
        {"form": "add", "start": 9, "end": 12},
    ]