File size: 4,683 Bytes
494c9e4
c911b05
 
494c9e4
 
c911b05
494c9e4
 
 
 
 
 
 
 
a0b7722
 
 
 
 
494c9e4
 
 
 
 
c911b05
a0b7722
 
 
 
494c9e4
a0b7722
494c9e4
 
 
c911b05
 
 
a0b7722
 
 
 
 
 
494c9e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0b7722
494c9e4
 
 
 
 
 
 
 
 
 
a0b7722
494c9e4
 
 
 
 
 
 
 
 
a0b7722
494c9e4
 
 
c911b05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/**
 * /api/prediction-attribute 与 /api/tokenize:统一请求与 JSON 解析。
 * 归因缓存规则见 {@link ./attributionResultCache}。
 */
import type { AttributionApiResponse, PredictionAttributeModelVariant } from './attributionResultCache';
import type { PromptTokenSpan } from './genAttributeDagPreprocess';
import {
    entryKey,
    removeCachedEntryByContentKey,
    save,
    takeSuccessfulAttributionFromCache,
} from './attributionResultCache';

const JSON_ERROR_SNIPPET_MAX = 160;
export type PredictionAttributeSourcePage =
    | 'analysis.html'
    | 'chat.html'
    | 'attribution.html'
    | 'gen_attribute.html';

export async function fetchPredictionAttribute(
    apiBaseForRequests: string,
    context: string,
    targetPrediction: string | null,
    model: PredictionAttributeModelVariant,
    sourcePage: PredictionAttributeSourcePage,
    targetTokenId?: number,
    flowId?: string,
    flowStep?: number,
): Promise<AttributionApiResponse> {
    const bodyObj: Record<string, unknown> = { context, model, source_page: sourcePage };
    if (targetPrediction !== null) {
        bodyObj.target_prediction = targetPrediction;
    }
    if (typeof targetTokenId === 'number' && Number.isInteger(targetTokenId) && targetTokenId >= 0) {
        bodyObj.target_token_id = targetTokenId;
    }
    if (typeof flowId === 'string' && flowId.length > 0) {
        bodyObj.flow_id = flowId;
    }
    if (typeof flowStep === 'number' && Number.isInteger(flowStep) && flowStep >= 0) {
        bodyObj.flow_step = flowStep;
    }
    const res = await fetch(`${apiBaseForRequests}/api/prediction-attribute`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify(bodyObj),
    });
    const text = await res.text();
    let json: AttributionApiResponse & { message?: string };
    try {
        json = JSON.parse(text) as AttributionApiResponse & { message?: string };
    } catch {
        const snippet =
            text.slice(0, JSON_ERROR_SNIPPET_MAX) + (text.length > JSON_ERROR_SNIPPET_MAX ? '…' : '');
        throw new Error(
            `Response is not JSON (HTTP ${res.status}). Gateway or proxy may have returned HTML: ${snippet}`
        );
    }
    if (!res.ok) {
        throw new Error(json.message ?? `HTTP ${res.status}`);
    }
    if (!json.success) {
        throw new Error(json.message ?? `Request failed (HTTP ${res.status})`);
    }
    return json;
}

export type LoadPredictionAttributeWithCacheOptions = {
    apiBaseForRequests: string;
    context: string;
    targetPrediction: string;
    model: PredictionAttributeModelVariant;
    sourcePage: PredictionAttributeSourcePage;
    /** 与归因页「Force retry」一致:先按 entry 删缓存再请求 */
    forceRefresh?: boolean;
};

/**
 * 未强制刷新时:命中成功缓存则 touch 后返回;否则请求并 save。
 */
export async function loadPredictionAttributeWithCache(
    options: LoadPredictionAttributeWithCacheOptions
): Promise<AttributionApiResponse> {
    const { apiBaseForRequests, context, targetPrediction, model, sourcePage, forceRefresh } = options;
    if (forceRefresh) {
        await removeCachedEntryByContentKey(entryKey(context, targetPrediction));
    }
    if (!forceRefresh) {
        const hit = await takeSuccessfulAttributionFromCache(context, targetPrediction);
        if (hit) {
            return hit;
        }
    }
    const json = await fetchPredictionAttribute(apiBaseForRequests, context, targetPrediction, model, sourcePage);
    await save({ context, targetPrediction }, json, 'complete');
    return json;
}

/**
 * POST /api/tokenize:快速分词,返回 prompt 各 token 的 offset + raw。
 * 不占推理锁,响应极快,用于在 DAG 模式流式生成时提前展示 prompt 节点。
 */
export async function fetchTokenize(
    apiBase: string,
    context: string,
    model: PredictionAttributeModelVariant,
): Promise<PromptTokenSpan[]> {
    const res = await fetch(`${apiBase}/api/tokenize`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ context, model }),
    });
    const text = await res.text();
    let json: { success: boolean; spans?: PromptTokenSpan[]; message?: string };
    try {
        json = JSON.parse(text) as typeof json;
    } catch {
        const snippet = text.slice(0, 160) + (text.length > 160 ? '…' : '');
        throw new Error(`/api/tokenize response is not JSON (HTTP ${res.status}): ${snippet}`);
    }
    if (!res.ok || !json.success) {
        throw new Error(json.message ?? `HTTP ${res.status}`);
    }
    return json.spans ?? [];
}