Spaces:
Build error
Build error
| ; | |
| var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { | |
| var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; | |
| if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); | |
| else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; | |
| return c > 3 && r && Object.defineProperty(target, key, r), r; | |
| }; | |
| var __metadata = (this && this.__metadata) || function (k, v) { | |
| if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); | |
| }; | |
| var _a; | |
| Object.defineProperty(exports, "__esModule", { value: true }); | |
| exports.LmControl = void 0; | |
| const async_service_1 = require("civkit/async-service"); | |
| const tsyringe_1 = require("tsyringe"); | |
| const logger_1 = require("./logger"); | |
| const civkit_1 = require("civkit"); | |
| const common_llm_1 = require("../shared/services/common-llm"); | |
| const jsdom_1 = require("./jsdom"); | |
| const tripleBackTick = '```'; | |
| let LmControl = class LmControl extends async_service_1.AsyncService { | |
| constructor(globalLogger, commonLLM, jsdomControl) { | |
| super(...arguments); | |
| this.globalLogger = globalLogger; | |
| this.commonLLM = commonLLM; | |
| this.jsdomControl = jsdomControl; | |
| this.logger = this.globalLogger.child({ service: this.constructor.name }); | |
| } | |
| async init() { | |
| await this.dependencyReady(); | |
| this.emit('ready'); | |
| } | |
| async *geminiFromBrowserSnapshot(snapshot) { | |
| const pageshot = snapshot?.pageshotUrl || snapshot?.pageshot; | |
| if (!pageshot) { | |
| throw new civkit_1.AssertionFailureError('Screenshot of the page is not available'); | |
| } | |
| const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg'); | |
| const it = this.commonLLM.iterRun('vertex-gemini-1.5-flash-002', { | |
| prompt: [ | |
| `HTML: \n${html}\n\nSCREENSHOT: \n`, | |
| typeof pageshot === 'string' ? new URL(pageshot) : pageshot, | |
| `Convert this webpage into a markdown source file that does not contain HTML tags, retaining the page language and visual structures.`, | |
| ], | |
| options: { | |
| system: 'You are ReaderLM-v7, a model that generates Markdown source files only. No HTML, notes and chit-chats allowed', | |
| stream: true | |
| } | |
| }); | |
| const chunks = []; | |
| for await (const txt of it) { | |
| chunks.push(txt); | |
| const output = { | |
| ...snapshot, | |
| parsed: { | |
| ...snapshot?.parsed, | |
| textContent: chunks.join(''), | |
| } | |
| }; | |
| yield output; | |
| } | |
| return; | |
| } | |
| async *readerLMMarkdownFromSnapshot(snapshot) { | |
| if (!snapshot) { | |
| throw new civkit_1.AssertionFailureError('Snapshot of the page is not available'); | |
| } | |
| const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg'); | |
| const it = this.commonLLM.iterRun('readerlm-v2', { | |
| prompt: `Extract the main content from the given HTML and convert it to Markdown format.\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n`, | |
| options: { | |
| // system: 'You are an AI assistant developed by VENDOR_NAME', | |
| stream: true, | |
| modelSpecific: { | |
| top_k: 1, | |
| temperature: 0, | |
| repetition_penalty: 1.13, | |
| presence_penalty: 0.25, | |
| frequency_penalty: 0.25, | |
| max_tokens: 8192, | |
| } | |
| }, | |
| maxTry: 1, | |
| }); | |
| const chunks = []; | |
| for await (const txt of it) { | |
| chunks.push(txt); | |
| const output = { | |
| ...snapshot, | |
| parsed: { | |
| ...snapshot?.parsed, | |
| textContent: chunks.join(''), | |
| } | |
| }; | |
| yield output; | |
| } | |
| return; | |
| } | |
| async *readerLMFromSnapshot(schema, instruction = 'Infer useful information from the HTML and present it in a structured JSON object.', snapshot) { | |
| if (!snapshot) { | |
| throw new civkit_1.AssertionFailureError('Snapshot of the page is not available'); | |
| } | |
| const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg'); | |
| const it = this.commonLLM.iterRun('readerlm-v2', { | |
| prompt: `${instruction}\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n${schema ? `The JSON schema:\n${tripleBackTick}json\n${schema}\n${tripleBackTick}\n` : ''}`, | |
| options: { | |
| // system: 'You are an AI assistant developed by VENDOR_NAME', | |
| stream: true, | |
| modelSpecific: { | |
| top_k: 1, | |
| temperature: 0, | |
| repetition_penalty: 1.13, | |
| presence_penalty: 0.25, | |
| frequency_penalty: 0.25, | |
| max_tokens: 8192, | |
| } | |
| }, | |
| maxTry: 1, | |
| }); | |
| const chunks = []; | |
| for await (const txt of it) { | |
| chunks.push(txt); | |
| const output = { | |
| ...snapshot, | |
| parsed: { | |
| ...snapshot?.parsed, | |
| textContent: chunks.join(''), | |
| } | |
| }; | |
| yield output; | |
| } | |
| return; | |
| } | |
| }; | |
| exports.LmControl = LmControl; | |
| exports.LmControl = LmControl = __decorate([ | |
| (0, tsyringe_1.singleton)(), | |
| __metadata("design:paramtypes", [logger_1.GlobalLogger, typeof (_a = typeof common_llm_1.LLMManager !== "undefined" && common_llm_1.LLMManager) === "function" ? _a : Object, jsdom_1.JSDomControl]) | |
| ], LmControl); | |
| //# sourceMappingURL=lm.js.map |