Mohammad Shahid
Include pre-built files for HF deployment
f316cce
"use strict";
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
return c > 3 && r && Object.defineProperty(target, key, r), r;
};
var __metadata = (this && this.__metadata) || function (k, v) {
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
};
var _a;
Object.defineProperty(exports, "__esModule", { value: true });
exports.LmControl = void 0;
const async_service_1 = require("civkit/async-service");
const tsyringe_1 = require("tsyringe");
const logger_1 = require("./logger");
const civkit_1 = require("civkit");
const common_llm_1 = require("../shared/services/common-llm");
const jsdom_1 = require("./jsdom");
const tripleBackTick = '```';
let LmControl = class LmControl extends async_service_1.AsyncService {
constructor(globalLogger, commonLLM, jsdomControl) {
super(...arguments);
this.globalLogger = globalLogger;
this.commonLLM = commonLLM;
this.jsdomControl = jsdomControl;
this.logger = this.globalLogger.child({ service: this.constructor.name });
}
async init() {
await this.dependencyReady();
this.emit('ready');
}
async *geminiFromBrowserSnapshot(snapshot) {
const pageshot = snapshot?.pageshotUrl || snapshot?.pageshot;
if (!pageshot) {
throw new civkit_1.AssertionFailureError('Screenshot of the page is not available');
}
const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg');
const it = this.commonLLM.iterRun('vertex-gemini-1.5-flash-002', {
prompt: [
`HTML: \n${html}\n\nSCREENSHOT: \n`,
typeof pageshot === 'string' ? new URL(pageshot) : pageshot,
`Convert this webpage into a markdown source file that does not contain HTML tags, retaining the page language and visual structures.`,
],
options: {
system: 'You are ReaderLM-v7, a model that generates Markdown source files only. No HTML, notes and chit-chats allowed',
stream: true
}
});
const chunks = [];
for await (const txt of it) {
chunks.push(txt);
const output = {
...snapshot,
parsed: {
...snapshot?.parsed,
textContent: chunks.join(''),
}
};
yield output;
}
return;
}
async *readerLMMarkdownFromSnapshot(snapshot) {
if (!snapshot) {
throw new civkit_1.AssertionFailureError('Snapshot of the page is not available');
}
const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg');
const it = this.commonLLM.iterRun('readerlm-v2', {
prompt: `Extract the main content from the given HTML and convert it to Markdown format.\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n`,
options: {
// system: 'You are an AI assistant developed by VENDOR_NAME',
stream: true,
modelSpecific: {
top_k: 1,
temperature: 0,
repetition_penalty: 1.13,
presence_penalty: 0.25,
frequency_penalty: 0.25,
max_tokens: 8192,
}
},
maxTry: 1,
});
const chunks = [];
for await (const txt of it) {
chunks.push(txt);
const output = {
...snapshot,
parsed: {
...snapshot?.parsed,
textContent: chunks.join(''),
}
};
yield output;
}
return;
}
async *readerLMFromSnapshot(schema, instruction = 'Infer useful information from the HTML and present it in a structured JSON object.', snapshot) {
if (!snapshot) {
throw new civkit_1.AssertionFailureError('Snapshot of the page is not available');
}
const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg');
const it = this.commonLLM.iterRun('readerlm-v2', {
prompt: `${instruction}\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n${schema ? `The JSON schema:\n${tripleBackTick}json\n${schema}\n${tripleBackTick}\n` : ''}`,
options: {
// system: 'You are an AI assistant developed by VENDOR_NAME',
stream: true,
modelSpecific: {
top_k: 1,
temperature: 0,
repetition_penalty: 1.13,
presence_penalty: 0.25,
frequency_penalty: 0.25,
max_tokens: 8192,
}
},
maxTry: 1,
});
const chunks = [];
for await (const txt of it) {
chunks.push(txt);
const output = {
...snapshot,
parsed: {
...snapshot?.parsed,
textContent: chunks.join(''),
}
};
yield output;
}
return;
}
};
exports.LmControl = LmControl;
exports.LmControl = LmControl = __decorate([
(0, tsyringe_1.singleton)(),
__metadata("design:paramtypes", [logger_1.GlobalLogger, typeof (_a = typeof common_llm_1.LLMManager !== "undefined" && common_llm_1.LLMManager) === "function" ? _a : Object, jsdom_1.JSDomControl])
], LmControl);
//# sourceMappingURL=lm.js.map