import { AsyncService } from 'civkit/async-service';
import { singleton } from 'tsyringe';

import { PageSnapshot } from './puppeteer';
import { GlobalLogger } from './logger';
import _ from 'lodash';
import { AssertionFailureError } from 'civkit';
import { LLMManager } from '../shared/services/common-llm';
import { JSDomControl } from './jsdom';

const tripleBackTick = '```';

@singleton()
export class LmControl extends AsyncService {

    logger = this.globalLogger.child({ service: this.constructor.name });

    constructor(
        protected globalLogger: GlobalLogger,
        protected commonLLM: LLMManager,
        protected jsdomControl: JSDomControl,
    ) {
        super(...arguments);
    }

    override async init() {
        await this.dependencyReady();

        this.emit('ready');
    }

    async* geminiFromBrowserSnapshot(snapshot?: PageSnapshot & {
        pageshotUrl?: string,
    }) {
        const pageshot = snapshot?.pageshotUrl || snapshot?.pageshot;

        if (!pageshot) {
            throw new AssertionFailureError('Screenshot of the page is not available');
        }

        const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg');

        const it = this.commonLLM.iterRun('vertex-gemini-1.5-flash-002', {
            prompt: [
                `HTML: \n${html}\n\nSCREENSHOT: \n`,
                typeof pageshot === 'string' ? new URL(pageshot) : pageshot,
                `Convert this webpage into a markdown source file that does not contain HTML tags, retaining the page language and visual structures.`,
            ],

            options: {
                system: 'You are ReaderLM-v7, a model that generates Markdown source files only. No HTML, notes and chit-chats allowed',
                stream: true
            }
        });

        const chunks: string[] = [];
        for await (const txt of it) {
            chunks.push(txt);
            const output: PageSnapshot = {
                ...snapshot,
                parsed: {
                    ...snapshot?.parsed,
                    textContent: chunks.join(''),
                }
            };
            yield output;
        }

        return;
    }

    async* readerLMMarkdownFromSnapshot(snapshot?: PageSnapshot) {
        if (!snapshot) {
            throw new AssertionFailureError('Snapshot of the page is not available');
        }

        const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg');

        const it = this.commonLLM.iterRun('readerlm-v2', {
            prompt: `Extract the main content from the given HTML and convert it to Markdown format.\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n`,

            options: {
                // system: 'You are an AI assistant developed by VENDOR_NAME',
                stream: true,
                modelSpecific: {
                    top_k: 1,
                    temperature: 0,
                    repetition_penalty: 1.13,
                    presence_penalty: 0.25,
                    frequency_penalty: 0.25,
                    max_tokens: 8192,
                }
            },
            maxTry: 1,
        });

        const chunks: string[] = [];
        for await (const txt of it) {
            chunks.push(txt);
            const output: PageSnapshot = {
                ...snapshot,
                parsed: {
                    ...snapshot?.parsed,
                    textContent: chunks.join(''),
                }
            };
            yield output;
        }

        return;
    }

    async* readerLMFromSnapshot(schema?: string, instruction: string = 'Infer useful information from the HTML and present it in a structured JSON object.', snapshot?: PageSnapshot) {
        if (!snapshot) {
            throw new AssertionFailureError('Snapshot of the page is not available');
        }

        const html = await this.jsdomControl.cleanHTMLforLMs(snapshot.html, 'script,link,style,textarea,select>option,svg');

        const it = this.commonLLM.iterRun('readerlm-v2', {
            prompt: `${instruction}\n\n${tripleBackTick}html\n${html}\n${tripleBackTick}\n${schema ? `The JSON schema:\n${tripleBackTick}json\n${schema}\n${tripleBackTick}\n` : ''}`,
            options: {
                // system: 'You are an AI assistant developed by VENDOR_NAME',
                stream: true,
                modelSpecific: {
                    top_k: 1,
                    temperature: 0,
                    repetition_penalty: 1.13,
                    presence_penalty: 0.25,
                    frequency_penalty: 0.25,
                    max_tokens: 8192,
                }
            },
            maxTry: 1,
        });

        const chunks: string[] = [];
        for await (const txt of it) {
            chunks.push(txt);
            const output: PageSnapshot = {
                ...snapshot,
                parsed: {
                    ...snapshot?.parsed,
                    textContent: chunks.join(''),
                }
            };
            yield output;
        }

        return;
    }
}