Spaces:

dqy08
/

InfoLens

Running on CPU Upgrade

App Files Files Community

InfoLens / client /src /ts /controllers /semanticSearchController.ts

dqy08

initial beta release

494c9e4 12 days ago

raw

history blame contribute delete

9.41 kB

	/**
	* 语义搜索控制器
	* 负责执行语义分析（整段 / 分块模式）
	*/

	import * as d3 from 'd3';
	import type { TextAnalysisAPI } from '../api/GLTR_API';
	import { isSemanticFromCache } from '../api/GLTR_API';
	import type { AppStateManager } from '../utils/appStateManager';
	import type { VisualizationUpdater } from '../utils/visualizationUpdater';
	import type { GLTR_Text_Box } from '../vis/GLTR_Text_Box';
	import { SEMANTIC_CHUNK_BYTES } from '../constants';
	import { getSemanticMatchThreshold } from '../utils/semanticThresholdManager';
	import { getDigitsMergeEnabled } from '../utils/digitsMergeManager';
	import {
	getAttentionRawScore,
	mergeAttentionTokensFullyForRendering,
	normalizeTokenScores,
	splitTextToChunks,
	} from '../utils/semanticUtils';
	import type { signalFitResult } from '../utils/signalThresholdDetector';

	export interface SemanticSearchControllerDeps {
	getQuery: () => string;
	getText: () => string;
	getSubmode: () => string \| undefined;
	isChunkedMode: () => boolean;
	api: TextAnalysisAPI;
	appStateManager: AppStateManager;
	visualizationUpdater: VisualizationUpdater;
	lmf: GLTR_Text_Box;
	showToast: (message: string, type: 'success' \| 'error') => void;
	showSemanticError: (message?: string) => void;
	onSearchStart: (query: string) => void;
	finishSemanticSearch: (query: string, matchDegree: number \| null, fromCache: boolean) => void;
	tr: (key: string) => string;
	extractErrorMessage: (err: unknown, fallback: string) => string;
	}

	export class SemanticSearchController {
	private deps: SemanticSearchControllerDeps;
	private abortController: AbortController \| null = null;

	constructor(deps: SemanticSearchControllerDeps) {
	this.deps = deps;
	}

	abort(): void {
	this.abortController?.abort();
	}

	run(): void {
	void this.runSemanticSearchBase(async ({ query, text, submode, signal }) => {
	if (this.deps.isChunkedMode()) {
	await this.runChunked({ query, text, submode, signal });
	} else {
	await this.runWhole({ query, text, submode, signal });
	}
	});
	}

	private async runSemanticSearchBase(
	execute: (params: { query: string; text: string; submode: string \| undefined; signal: AbortSignal }) => Promise<void>
	): Promise<void> {
	const query = this.deps.getQuery();
	if (!query) return;
	const text = this.deps.getText();
	if (!text) {
	this.deps.showToast(this.deps.tr('Please enter text first'), 'error');
	return;
	}
	this.abortController = new AbortController();
	const signal = this.abortController.signal;
	this.deps.onSearchStart(query);
	try {
	this.deps.appStateManager.setSemanticSearching(true);
	d3.select('#semantic_match_degree').style('display', 'none');
	d3.select('#semantic_search_loader').style('visibility', 'visible');
	d3.select('#all_result').style('opacity', 1).style('display', null);
	this.deps.lmf.setTextOnly(text);
	this.deps.visualizationUpdater.updateHistogramVisibilityForPending('semantic', text, this.deps.isChunkedMode());
	await execute({ query, text, submode: this.deps.getSubmode(), signal });
	} catch (err) {
	if (err instanceof Error && err.name === 'AbortError') {
	this.deps.lmf.hideLoading();
	this.deps.visualizationUpdater.rerenderHistograms();
	return;
	}
	this.deps.showToast(
	this.deps.extractErrorMessage(err, this.deps.tr('Semantic analysis failed')),
	'error'
	);
	this.deps.lmf.hideLoading();
	this.deps.visualizationUpdater.rerenderHistograms();
	} finally {
	this.abortController = null;
	this.deps.appStateManager.setSemanticSearching(false);
	d3.select('#semantic_search_loader').style('visibility', 'hidden');
	}
	}

	private async runWhole(params: { query: string; text: string; submode: string \| undefined; signal: AbortSignal }): Promise<void> {
	const { query, text, submode, signal } = params;
	const onProgress = (step: number, totalSteps: number, stage: string, percentage?: number) => {
	const progressText = percentage !== undefined && percentage !== null
	? `Step ${step}/${totalSteps}:\t ${stage} ${percentage}%`
	: `Step ${step}/${totalSteps}:\t ${stage}`;
	d3.select('#semantic_progress').text(progressText).style('display', 'inline-block');
	};
	const res = await this.deps.api.analyzeSemantic(query, text, { onProgress, submode, debug_info: true, signal });
	if (res?.success && res?.token_attention) {
	this.deps.visualizationUpdater.handleSemanticResponse(res, text);
	const md = res?.full_match_degree;
	this.deps.finishSemanticSearch(query, md != null && typeof md === 'number' ? md : null, isSemanticFromCache(res));
	} else {
	this.deps.showSemanticError(res?.message);
	}
	}

	private async runChunked(params: { query: string; text: string; submode: string \| undefined; signal: AbortSignal }): Promise<void> {
	const { query, text, submode, signal } = params;
	const chunks = splitTextToChunks(text, SEMANTIC_CHUNK_BYTES);
	if (chunks.length === 0) {
	this.deps.visualizationUpdater.handleSemanticResponse({ token_attention: [] }, text, undefined);
	this.deps.finishSemanticSearch(query, null, true);
	return;
	}
	/** 各 chunk 内已 overlap+digit+normalize，仅做 offset 平移后拼接，全文不再合并/归一化 */
	const allChunkProcessedTokens: Array<{
	offset: [number, number];
	raw: string;
	score: number;
	rawScore?: number;
	}> = [];
	const chunkInfos: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }> = [];
	let maxMatchDegree = 0;
	let allFromCache = true;
	let aborted = false;
	let lastChunkFromCache = false;

	for (let i = 0; i < chunks.length; i++) {
	if (signal.aborted) break;
	d3.select('#semantic_progress').text(`Chunk ${i + 1}/${chunks.length}`).style('display', 'inline-block');
	const res = await this.deps.api.analyzeSemantic(query, chunks[i].text, { submode, signal });
	// NOTE:
	// Whether a chunk is served from semantic cache can only be determined after
	// this request returns (frontend can't pre-check cache hit before `res`).
	// Therefore, chunk-related UI rendering (histogram / chunkLines / semantic token coloring)
	// is intentionally gated and starts from the first non-cached chunk.
	if (signal.aborted) {
	aborted = true;
	break;
	}
	if (!res?.success) {
	this.deps.showSemanticError(res?.message);
	aborted = true;
	break;
	}
	lastChunkFromCache = isSemanticFromCache(res);
	if (!lastChunkFromCache) allFromCache = false;
	const matchDegree = res.full_match_degree ?? 0;
	maxMatchDegree = Math.max(maxMatchDegree, matchDegree);
	const matched = matchDegree >= getSemanticMatchThreshold();
	const merged = mergeAttentionTokensFullyForRendering(res.token_attention ?? [], chunks[i].text, {
	digitMerge: getDigitsMergeEnabled(),
	});
	const normalized = normalizeTokenScores(merged);
	const tokens = matched
	? normalized
	: normalized.map((t) => ({ ...t, rawScore: getAttentionRawScore(t), score: 0 }));

	chunkInfos.push({
	startOffset: chunks[i].startOffset,
	endOffset: chunks[i].startOffset + chunks[i].text.length,
	chunkIndex: i,
	chunkMatchDegree: matchDegree,
	});
	const tokensOffsetAdjusted = tokens.map(t => ({
	...t,
	offset: [t.offset[0] + chunks[i].startOffset, t.offset[1] + chunks[i].startOffset] as [number, number],
	}));
	allChunkProcessedTokens.push(...tokensOffsetAdjusted);
	if (!lastChunkFromCache) {
	if (!this.deps.visualizationUpdater.handleSemanticResponse(
	{ token_attention: allChunkProcessedTokens, chunkInfos, debug_info: undefined },
	text,
	undefined
	)) {
	aborted = true;
	this.deps.showSemanticError();
	break;
	}
	}
	}

	if (!aborted) {
	if (lastChunkFromCache) {
	this.deps.visualizationUpdater.handleSemanticResponse(
	{ token_attention: allChunkProcessedTokens, chunkInfos, debug_info: undefined },
	text,
	undefined
	);
	}
	this.deps.finishSemanticSearch(query, maxMatchDegree, allFromCache);
	}
	}
	}