Spaces:

Scribbler310
/

gs-port

Sleeping

App Files Files Community

gs-port / node_modules /@langchain /openai /dist /tools /computerUse.d.cts

Scribbler310

feat: enhance dashboard

c2b7eb3 verified 14 days ago

raw

history blame contribute delete

8.46 kB

	import { OpenAI } from "openai";
	import { ToolMessage } from "@langchain/core/messages";
	import { DynamicStructuredTool, ToolRuntime } from "@langchain/core/tools";
	import { z } from "zod/v4";

	//#region src/tools/computerUse.d.ts
	/**
	* The type of computer environment to control.
	*/
	type ComputerUseEnvironment = "browser" \| "mac" \| "windows" \| "linux" \| "ubuntu";
	/**
	* Re-export action types from OpenAI SDK for convenience.
	*/
	type ComputerUseClickAction = OpenAI.Responses.ResponseComputerToolCall.Click;
	type ComputerUseDoubleClickAction = OpenAI.Responses.ResponseComputerToolCall.DoubleClick;
	type ComputerUseDragAction = OpenAI.Responses.ResponseComputerToolCall.Drag;
	type ComputerUseKeypressAction = OpenAI.Responses.ResponseComputerToolCall.Keypress;
	type ComputerUseMoveAction = OpenAI.Responses.ResponseComputerToolCall.Move;
	type ComputerUseScreenshotAction = OpenAI.Responses.ResponseComputerToolCall.Screenshot;
	type ComputerUseScrollAction = OpenAI.Responses.ResponseComputerToolCall.Scroll;
	type ComputerUseTypeAction = OpenAI.Responses.ResponseComputerToolCall.Type;
	type ComputerUseWaitAction = OpenAI.Responses.ResponseComputerToolCall.Wait;
	/**
	* Union type of all computer use actions from OpenAI SDK.
	*/
	type ComputerUseAction = OpenAI.Responses.ResponseComputerToolCall["action"];
	declare const ComputerUseActionSchema: z.ZodObject<{
	action: z.ZodUnion<readonly [z.ZodObject<{
	type: z.ZodLiteral<"screenshot">;
	}, z.core.$strip>, z.ZodObject<{
	type: z.ZodLiteral<"click">;
	x: z.ZodNumber;
	y: z.ZodNumber;
	button: z.ZodDefault<z.ZodEnum<{
	back: "back";
	forward: "forward";
	left: "left";
	right: "right";
	wheel: "wheel";
	}>>;
	}, z.core.$strip>, z.ZodObject<{
	type: z.ZodLiteral<"double_click">;
	x: z.ZodNumber;
	y: z.ZodNumber;
	button: z.ZodDefault<z.ZodEnum<{
	back: "back";
	forward: "forward";
	left: "left";
	right: "right";
	wheel: "wheel";
	}>>;
	}, z.core.$strip>, z.ZodObject<{
	type: z.ZodLiteral<"drag">;
	path: z.ZodArray<z.ZodObject<{
	x: z.ZodNumber;
	y: z.ZodNumber;
	}, z.core.$strip>>;
	}, z.core.$strip>, z.ZodObject<{
	type: z.ZodLiteral<"keypress">;
	keys: z.ZodArray<z.ZodString>;
	}, z.core.$strip>, z.ZodObject<{
	type: z.ZodLiteral<"move">;
	x: z.ZodNumber;
	y: z.ZodNumber;
	}, z.core.$strip>, z.ZodObject<{
	type: z.ZodLiteral<"scroll">;
	x: z.ZodNumber;
	y: z.ZodNumber;
	scroll_x: z.ZodNumber;
	scroll_y: z.ZodNumber;
	}, z.core.$strip>, z.ZodObject<{
	type: z.ZodLiteral<"type">;
	text: z.ZodString;
	}, z.core.$strip>, z.ZodObject<{
	type: z.ZodLiteral<"wait">;
	duration: z.ZodOptional<z.ZodNumber>;
	}, z.core.$strip>]>;
	}, z.core.$strip>;
	/**
	* Input structure for the Computer Use tool.
	* The action is wrapped in an `action` property.
	*/
	interface ComputerUseInput {
	action: ComputerUseAction;
	}
	type ComputerUseReturnType = string \| Promise<string> \| ToolMessage<any> \| Promise<ToolMessage<any>>;
	/**
	* Options for the Computer Use tool.
	*/
	interface ComputerUseOptions {
	/**
	* The width of the computer display in pixels.
	*/
	displayWidth: number;
	/**
	* The height of the computer display in pixels.
	*/
	displayHeight: number;
	/**
	* The type of computer environment to control.
	* - `browser`: Browser automation (recommended for most use cases)
	* - `mac`: macOS environment
	* - `windows`: Windows environment
	* - `linux`: Linux environment
	* - `ubuntu`: Ubuntu environment
	*/
	environment: ComputerUseEnvironment;
	/**
	* Execute function that handles computer action execution.
	* This function receives the action input and should return a base64-encoded
	* screenshot of the result.
	*/
	execute: (action: ComputerUseAction, runtime: ToolRuntime<any, any>) => ComputerUseReturnType;
	}
	/**
	* OpenAI Computer Use tool type for the Responses API.
	*/
	type ComputerUseTool = OpenAI.Responses.ComputerUsePreviewTool;
	/**
	* Creates a Computer Use tool that allows models to control computer interfaces
	* and perform tasks by simulating mouse clicks, keyboard input, scrolling, and more.
	*
	* Computer Use is a practical application of OpenAI's Computer-Using Agent (CUA)
	* model (`computer-use-preview`), which combines vision capabilities with advanced
	* reasoning to simulate controlling computer interfaces.
	*
	* How it works:
	* The tool operates in a continuous loop:
	* 1. Model sends computer actions (click, type, scroll, etc.)
	* 2. Your code executes these actions in a controlled environment
	* 3. You capture a screenshot of the result
	* 4. Send the screenshot back to the model
	* 5. Repeat until the task is complete
	*
	* Important: Computer use is in beta and requires careful consideration:
	* - Use in sandboxed environments only
	* - Do not use for high-stakes or authenticated tasks
	* - Always implement human-in-the-loop for important decisions
	* - Handle safety checks appropriately
	*
	* @see {@link https://platform.openai.com/docs/guides/tools-computer-use \| OpenAI Computer Use Documentation}
	*
	* @param options - Configuration options for the Computer Use tool
	* @returns A Computer Use tool that can be passed to `bindTools`
	*
	* @example
	* ```typescript
	* import { ChatOpenAI, tools } from "@langchain/openai";
	*
	* const model = new ChatOpenAI({ model: "computer-use-preview" });
	*
	* // With execute callback for automatic action handling
	* const computer = tools.computerUse({
	* displayWidth: 1024,
	* displayHeight: 768,
	* environment: "browser",
	* execute: async (action) => {
	* if (action.type === "screenshot") {
	* return captureScreenshot();
	* }
	* if (action.type === "click") {
	* await page.mouse.click(action.x, action.y, { button: action.button });
	* return captureScreenshot();
	* }
	* if (action.type === "type") {
	* await page.keyboard.type(action.text);
	* return captureScreenshot();
	* }
	* // Handle other actions...
	* return captureScreenshot();
	* },
	* });
	*
	* const llmWithComputer = model.bindTools([computer]);
	* const response = await llmWithComputer.invoke(
	* "Check the latest news on bing.com"
	* );
	* ```
	*
	* @example
	* ```typescript
	* // Without execute callback (manual action handling)
	* const computer = tools.computerUse({
	* displayWidth: 1024,
	* displayHeight: 768,
	* environment: "browser",
	* });
	*
	* const response = await model.invoke("Check the news", {
	* tools: [computer],
	* });
	*
	* // Access the computer call from the response
	* const computerCall = response.additional_kwargs.tool_outputs?.find(
	* (output) => output.type === "computer_call"
	* );
	* if (computerCall) {
	* console.log("Action to execute:", computerCall.action);
	* // Execute the action manually, then send back a screenshot
	* }
	* ```
	*
	* @example
	* ```typescript
	* // For macOS desktop automation with Docker
	* const computer = tools.computerUse({
	* displayWidth: 1920,
	* displayHeight: 1080,
	* environment: "mac",
	* execute: async (action) => {
	* if (action.type === "click") {
	* await dockerExec(
	* `DISPLAY=:99 xdotool mousemove ${action.x} ${action.y} click 1`,
	* containerName
	* );
	* }
	* // Capture screenshot from container
	* return await getDockerScreenshot(containerName);
	* },
	* });
	* ```
	*
	* @remarks
	* - Only available through the Responses API (not Chat Completions)
	* - Requires `computer-use-preview` model
	* - Actions include: click, double_click, drag, keypress, move, screenshot, scroll, type, wait
	* - Safety checks may be returned that require acknowledgment before proceeding
	* - Use `truncation: "auto"` parameter when making requests
	* - Recommended to use with `reasoning.summary` for debugging
	*/
	declare function computerUse(options: ComputerUseOptions): DynamicStructuredTool<typeof ComputerUseActionSchema, ComputerUseInput, unknown, ComputerUseReturnType>;
	//#endregion
	export { ComputerUseAction, ComputerUseClickAction, ComputerUseDoubleClickAction, ComputerUseDragAction, ComputerUseEnvironment, ComputerUseInput, ComputerUseKeypressAction, ComputerUseMoveAction, ComputerUseOptions, ComputerUseScreenshotAction, ComputerUseScrollAction, ComputerUseTool, ComputerUseTypeAction, ComputerUseWaitAction, computerUse };
	//# sourceMappingURL=computerUse.d.cts.map