Spaces:
Sleeping
Sleeping
| import { OpenAI } from "openai"; | |
| import { ToolMessage } from "@langchain/core/messages"; | |
| import { DynamicStructuredTool, ToolRuntime } from "@langchain/core/tools"; | |
| import { z } from "zod/v4"; | |
| //#region src/tools/computerUse.d.ts | |
| /** | |
| * The type of computer environment to control. | |
| */ | |
| type ComputerUseEnvironment = "browser" | "mac" | "windows" | "linux" | "ubuntu"; | |
| /** | |
| * Re-export action types from OpenAI SDK for convenience. | |
| */ | |
| type ComputerUseClickAction = OpenAI.Responses.ResponseComputerToolCall.Click; | |
| type ComputerUseDoubleClickAction = OpenAI.Responses.ResponseComputerToolCall.DoubleClick; | |
| type ComputerUseDragAction = OpenAI.Responses.ResponseComputerToolCall.Drag; | |
| type ComputerUseKeypressAction = OpenAI.Responses.ResponseComputerToolCall.Keypress; | |
| type ComputerUseMoveAction = OpenAI.Responses.ResponseComputerToolCall.Move; | |
| type ComputerUseScreenshotAction = OpenAI.Responses.ResponseComputerToolCall.Screenshot; | |
| type ComputerUseScrollAction = OpenAI.Responses.ResponseComputerToolCall.Scroll; | |
| type ComputerUseTypeAction = OpenAI.Responses.ResponseComputerToolCall.Type; | |
| type ComputerUseWaitAction = OpenAI.Responses.ResponseComputerToolCall.Wait; | |
| /** | |
| * Union type of all computer use actions from OpenAI SDK. | |
| */ | |
| type ComputerUseAction = OpenAI.Responses.ResponseComputerToolCall["action"]; | |
| declare const ComputerUseActionSchema: z.ZodObject<{ | |
| action: z.ZodUnion<readonly [z.ZodObject<{ | |
| type: z.ZodLiteral<"screenshot">; | |
| }, z.core.$strip>, z.ZodObject<{ | |
| type: z.ZodLiteral<"click">; | |
| x: z.ZodNumber; | |
| y: z.ZodNumber; | |
| button: z.ZodDefault<z.ZodEnum<{ | |
| back: "back"; | |
| forward: "forward"; | |
| left: "left"; | |
| right: "right"; | |
| wheel: "wheel"; | |
| }>>; | |
| }, z.core.$strip>, z.ZodObject<{ | |
| type: z.ZodLiteral<"double_click">; | |
| x: z.ZodNumber; | |
| y: z.ZodNumber; | |
| button: z.ZodDefault<z.ZodEnum<{ | |
| back: "back"; | |
| forward: "forward"; | |
| left: "left"; | |
| right: "right"; | |
| wheel: "wheel"; | |
| }>>; | |
| }, z.core.$strip>, z.ZodObject<{ | |
| type: z.ZodLiteral<"drag">; | |
| path: z.ZodArray<z.ZodObject<{ | |
| x: z.ZodNumber; | |
| y: z.ZodNumber; | |
| }, z.core.$strip>>; | |
| }, z.core.$strip>, z.ZodObject<{ | |
| type: z.ZodLiteral<"keypress">; | |
| keys: z.ZodArray<z.ZodString>; | |
| }, z.core.$strip>, z.ZodObject<{ | |
| type: z.ZodLiteral<"move">; | |
| x: z.ZodNumber; | |
| y: z.ZodNumber; | |
| }, z.core.$strip>, z.ZodObject<{ | |
| type: z.ZodLiteral<"scroll">; | |
| x: z.ZodNumber; | |
| y: z.ZodNumber; | |
| scroll_x: z.ZodNumber; | |
| scroll_y: z.ZodNumber; | |
| }, z.core.$strip>, z.ZodObject<{ | |
| type: z.ZodLiteral<"type">; | |
| text: z.ZodString; | |
| }, z.core.$strip>, z.ZodObject<{ | |
| type: z.ZodLiteral<"wait">; | |
| duration: z.ZodOptional<z.ZodNumber>; | |
| }, z.core.$strip>]>; | |
| }, z.core.$strip>; | |
| /** | |
| * Input structure for the Computer Use tool. | |
| * The action is wrapped in an `action` property. | |
| */ | |
| interface ComputerUseInput { | |
| action: ComputerUseAction; | |
| } | |
| type ComputerUseReturnType = string | Promise<string> | ToolMessage<any> | Promise<ToolMessage<any>>; | |
| /** | |
| * Options for the Computer Use tool. | |
| */ | |
| interface ComputerUseOptions { | |
| /** | |
| * The width of the computer display in pixels. | |
| */ | |
| displayWidth: number; | |
| /** | |
| * The height of the computer display in pixels. | |
| */ | |
| displayHeight: number; | |
| /** | |
| * The type of computer environment to control. | |
| * - `browser`: Browser automation (recommended for most use cases) | |
| * - `mac`: macOS environment | |
| * - `windows`: Windows environment | |
| * - `linux`: Linux environment | |
| * - `ubuntu`: Ubuntu environment | |
| */ | |
| environment: ComputerUseEnvironment; | |
| /** | |
| * Execute function that handles computer action execution. | |
| * This function receives the action input and should return a base64-encoded | |
| * screenshot of the result. | |
| */ | |
| execute: (action: ComputerUseAction, runtime: ToolRuntime<any, any>) => ComputerUseReturnType; | |
| } | |
| /** | |
| * OpenAI Computer Use tool type for the Responses API. | |
| */ | |
| type ComputerUseTool = OpenAI.Responses.ComputerUsePreviewTool; | |
| /** | |
| * Creates a Computer Use tool that allows models to control computer interfaces | |
| * and perform tasks by simulating mouse clicks, keyboard input, scrolling, and more. | |
| * | |
| * **Computer Use** is a practical application of OpenAI's Computer-Using Agent (CUA) | |
| * model (`computer-use-preview`), which combines vision capabilities with advanced | |
| * reasoning to simulate controlling computer interfaces. | |
| * | |
| * **How it works**: | |
| * The tool operates in a continuous loop: | |
| * 1. Model sends computer actions (click, type, scroll, etc.) | |
| * 2. Your code executes these actions in a controlled environment | |
| * 3. You capture a screenshot of the result | |
| * 4. Send the screenshot back to the model | |
| * 5. Repeat until the task is complete | |
| * | |
| * **Important**: Computer use is in beta and requires careful consideration: | |
| * - Use in sandboxed environments only | |
| * - Do not use for high-stakes or authenticated tasks | |
| * - Always implement human-in-the-loop for important decisions | |
| * - Handle safety checks appropriately | |
| * | |
| * @see {@link https://platform.openai.com/docs/guides/tools-computer-use | OpenAI Computer Use Documentation} | |
| * | |
| * @param options - Configuration options for the Computer Use tool | |
| * @returns A Computer Use tool that can be passed to `bindTools` | |
| * | |
| * @example | |
| * ```typescript | |
| * import { ChatOpenAI, tools } from "@langchain/openai"; | |
| * | |
| * const model = new ChatOpenAI({ model: "computer-use-preview" }); | |
| * | |
| * // With execute callback for automatic action handling | |
| * const computer = tools.computerUse({ | |
| * displayWidth: 1024, | |
| * displayHeight: 768, | |
| * environment: "browser", | |
| * execute: async (action) => { | |
| * if (action.type === "screenshot") { | |
| * return captureScreenshot(); | |
| * } | |
| * if (action.type === "click") { | |
| * await page.mouse.click(action.x, action.y, { button: action.button }); | |
| * return captureScreenshot(); | |
| * } | |
| * if (action.type === "type") { | |
| * await page.keyboard.type(action.text); | |
| * return captureScreenshot(); | |
| * } | |
| * // Handle other actions... | |
| * return captureScreenshot(); | |
| * }, | |
| * }); | |
| * | |
| * const llmWithComputer = model.bindTools([computer]); | |
| * const response = await llmWithComputer.invoke( | |
| * "Check the latest news on bing.com" | |
| * ); | |
| * ``` | |
| * | |
| * @example | |
| * ```typescript | |
| * // Without execute callback (manual action handling) | |
| * const computer = tools.computerUse({ | |
| * displayWidth: 1024, | |
| * displayHeight: 768, | |
| * environment: "browser", | |
| * }); | |
| * | |
| * const response = await model.invoke("Check the news", { | |
| * tools: [computer], | |
| * }); | |
| * | |
| * // Access the computer call from the response | |
| * const computerCall = response.additional_kwargs.tool_outputs?.find( | |
| * (output) => output.type === "computer_call" | |
| * ); | |
| * if (computerCall) { | |
| * console.log("Action to execute:", computerCall.action); | |
| * // Execute the action manually, then send back a screenshot | |
| * } | |
| * ``` | |
| * | |
| * @example | |
| * ```typescript | |
| * // For macOS desktop automation with Docker | |
| * const computer = tools.computerUse({ | |
| * displayWidth: 1920, | |
| * displayHeight: 1080, | |
| * environment: "mac", | |
| * execute: async (action) => { | |
| * if (action.type === "click") { | |
| * await dockerExec( | |
| * `DISPLAY=:99 xdotool mousemove ${action.x} ${action.y} click 1`, | |
| * containerName | |
| * ); | |
| * } | |
| * // Capture screenshot from container | |
| * return await getDockerScreenshot(containerName); | |
| * }, | |
| * }); | |
| * ``` | |
| * | |
| * @remarks | |
| * - Only available through the Responses API (not Chat Completions) | |
| * - Requires `computer-use-preview` model | |
| * - Actions include: click, double_click, drag, keypress, move, screenshot, scroll, type, wait | |
| * - Safety checks may be returned that require acknowledgment before proceeding | |
| * - Use `truncation: "auto"` parameter when making requests | |
| * - Recommended to use with `reasoning.summary` for debugging | |
| */ | |
| declare function computerUse(options: ComputerUseOptions): DynamicStructuredTool<typeof ComputerUseActionSchema, ComputerUseInput, unknown, ComputerUseReturnType>; | |
| //#endregion | |
| export { ComputerUseAction, ComputerUseClickAction, ComputerUseDoubleClickAction, ComputerUseDragAction, ComputerUseEnvironment, ComputerUseInput, ComputerUseKeypressAction, ComputerUseMoveAction, ComputerUseOptions, ComputerUseScreenshotAction, ComputerUseScrollAction, ComputerUseTool, ComputerUseTypeAction, ComputerUseWaitAction, computerUse }; | |
| //# sourceMappingURL=computerUse.d.cts.map |