File size: 2,968 Bytes
d02cfdb
 
a6eaeaf
d02cfdb
 
 
 
 
 
 
 
 
 
 
 
 
a6eaeaf
 
 
 
d02cfdb
 
 
 
 
 
 
 
 
 
 
 
 
 
eff241c
 
8068223
 
 
 
 
 
 
 
a6eaeaf
 
 
 
 
eff241c
 
 
 
a6eaeaf
eff241c
a6eaeaf
 
eff241c
a6eaeaf
 
 
eff241c
a6eaeaf
eff241c
 
 
 
a6eaeaf
 
eff241c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from contextlib import redirect_stdout
from io import StringIO
import json

from calender_en import inference


def test_inference_runs_end_to_end_without_crashing() -> None:
    output = StringIO()

    with redirect_stdout(output):
        inference.main()

    rendered = output.getvalue()
    lines = rendered.strip().splitlines()
    assert len(lines) == 6
    assert (
        lines[0]
        == "[START] task=smart_calendar_resolution env=calender_en model=deterministic-baseline"
    )
    assert lines[-1] == "[END] success=true steps=4 rewards=1.00,1.50,4.00,3.00"
    assert all(line.startswith("[STEP]") for line in lines[1:5])


def test_inference_output_is_deterministic() -> None:
    first = StringIO()
    second = StringIO()

    with redirect_stdout(first):
        inference.main()
    with redirect_stdout(second):
        inference.main()

    assert first.getvalue() == second.getvalue()


def test_inference_reads_model_name_from_environment(monkeypatch) -> None:
    output = StringIO()
    monkeypatch.setenv("MODEL_NAME", "hf-eval-check")

    with redirect_stdout(output):
        inference.main()

    lines = output.getvalue().strip().splitlines()
    assert lines[0] == (
        "[START] task=smart_calendar_resolution env=calender_en model=hf-eval-check"
    )


def test_inference_uses_hackathon_proxy_env_vars(monkeypatch) -> None:
    captured: dict[str, object] = {}

    class FakeResponse:
        def __init__(self, content: str):
            class Message:
                def __init__(self, value: str):
                    self.content = value

            class Choice:
                def __init__(self, value: str):
                    self.message = Message(value)

            self.choices = [Choice(content)]

    class FakeCompletions:
        def create(self, **kwargs):
            captured["request"] = kwargs
            payload = json.loads(kwargs["messages"][1]["content"])
            return FakeResponse(json.dumps(payload["planned_action"]))

    class FakeChat:
        completions = FakeCompletions()

    class FakeOpenAI:
        def __init__(self, *, base_url: str, api_key: str):
            captured["base_url"] = base_url
            captured["api_key"] = api_key
            self.chat = FakeChat()

    monkeypatch.setenv("API_BASE_URL", "https://proxy.example.com/v1")
    monkeypatch.setenv("API_KEY", "proxy-test-key")
    monkeypatch.setenv("MODEL_NAME", "meta-hackathon-model")
    monkeypatch.setattr(inference, "OpenAI", FakeOpenAI)

    output = StringIO()
    with redirect_stdout(output):
        inference.main()

    rendered = output.getvalue().strip().splitlines()
    assert rendered[0] == (
        "[START] task=smart_calendar_resolution env=calender_en "
        "model=meta-hackathon-model"
    )
    assert captured["base_url"] == "https://proxy.example.com/v1"
    assert captured["api_key"] == "proxy-test-key"
    assert captured["request"]["model"] == "meta-hackathon-model"