"use strict"; var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; return c > 3 && r && Object.defineProperty(target, key, r), r; }; var __metadata = (this && this.__metadata) || function (k, v) { if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); }; var _a; Object.defineProperty(exports, "__esModule", { value: true }); exports.LmControl = void 0; const async_service_1 = require("civkit/async-service"); const tsyringe_1 = require("tsyringe"); const logger_1 = require("./logger"); const civkit_1 = require("civkit"); const common_llm_1 = require("../shared/services/common-llm"); const jsdom_1 = require("./jsdom"); const tripleBackTick = '```'; let LmControl = class LmControl extends async_service_1.AsyncService { constructor(globalLogger, commonLLM, jsdomControl) { super(...arguments); this.globalLogger = globalLogger; this.commonLLM = commonLLM; this.jsdomControl = jsdomControl; this.logger = this.globalLogger.child({ service: this.constructor.name }); } async init() { await this.dependencyReady(); this.emit('ready'); } async *geminiFromBrowserSnapshot(snapshot) { const pageshot = snapshot?.pageshotUrl || snapshot?.pageshot; if (!pageshot) { throw new civkit_1.AssertionFailureError('Screenshot of the page is not available'); } const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg'); const it = this.commonLLM.iterRun('vertex-gemini-1.5-flash-002', { prompt: [ `HTML: \n${html}\n\nSCREENSHOT: \n`, typeof pageshot === 'string' ? new URL(pageshot) : pageshot, `Convert this webpage into a markdown source file that does not contain HTML tags, retaining the page language and visual structures.`, ], options: { system: 'You are ReaderLM-v7, a model that generates Markdown source files only. No HTML, notes and chit-chats allowed', stream: true } }); const chunks = []; for await (const txt of it) { chunks.push(txt); const output = { ...snapshot, parsed: { ...snapshot?.parsed, textContent: chunks.join(''), } }; yield output; } return; } async *readerLMMarkdownFromSnapshot(snapshot) { if (!snapshot) { throw new civkit_1.AssertionFailureError('Snapshot of the page is not available'); } const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg'); const it = this.commonLLM.iterRun('readerlm-v2', { prompt: `Extract the main content from the given HTML and convert it to Markdown format.\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n`, options: { // system: 'You are an AI assistant developed by VENDOR_NAME', stream: true, modelSpecific: { top_k: 1, temperature: 0, repetition_penalty: 1.13, presence_penalty: 0.25, frequency_penalty: 0.25, max_tokens: 8192, } }, maxTry: 1, }); const chunks = []; for await (const txt of it) { chunks.push(txt); const output = { ...snapshot, parsed: { ...snapshot?.parsed, textContent: chunks.join(''), } }; yield output; } return; } async *readerLMFromSnapshot(schema, instruction = 'Infer useful information from the HTML and present it in a structured JSON object.', snapshot) { if (!snapshot) { throw new civkit_1.AssertionFailureError('Snapshot of the page is not available'); } const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg'); const it = this.commonLLM.iterRun('readerlm-v2', { prompt: `${instruction}\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n${schema ? `The JSON schema:\n${tripleBackTick}json\n${schema}\n${tripleBackTick}\n` : ''}`, options: { // system: 'You are an AI assistant developed by VENDOR_NAME', stream: true, modelSpecific: { top_k: 1, temperature: 0, repetition_penalty: 1.13, presence_penalty: 0.25, frequency_penalty: 0.25, max_tokens: 8192, } }, maxTry: 1, }); const chunks = []; for await (const txt of it) { chunks.push(txt); const output = { ...snapshot, parsed: { ...snapshot?.parsed, textContent: chunks.join(''), } }; yield output; } return; } }; exports.LmControl = LmControl; exports.LmControl = LmControl = __decorate([ (0, tsyringe_1.singleton)(), __metadata("design:paramtypes", [logger_1.GlobalLogger, typeof (_a = typeof common_llm_1.LLMManager !== "undefined" && common_llm_1.LLMManager) === "function" ? _a : Object, jsdom_1.JSDomControl]) ], LmControl); //# sourceMappingURL=lm.js.map