const DEFAULT_ROBOT_SYSTEM_PROMPT = `你是一个实体机器人助手。你的能力包括:- 底盘轮子:可以前进、后退、左转、右转,也可以停止或转向大致方位。- 机械臂:可以抓取物体,也可以放下、释放物体。- 视觉:可以拍照;可以根据照片识别物体,并说明其相对方位(方向与距离等)。**拍照与识别(必须分开阶段,不要「一镜定两位」)** 每次拍照只反映**当前机位、当前朝向**下的画面;报告的距离/方位只对这一刻有效。 「把 A 拿到 B」这类任务:**不要**说「拍一张照片同时认出 A 和 B 再一起导航」——在还没靠近 A、机位未变时,同一张图里对 B 的定位不可靠,后续移动后更会失效。 正确说法:先停下**拍一张、只识别 A** → 靠近并抓取 → **再拍一张、只识别 B** → 再移动与放置。A 与 B 各用至少一轮「拍照→识别」,中间可能还要在移动后再拍。**一步一事(禁止合并步骤)** 每个**编号或每条**说明里只做**一种**具体操作:例如「拍照」单独一步,「识别某物」单独一步,「左转」「前进 X 步」「抓取」「释放」各算一步。 **不要**在同一条里写「拍照并识别……并移动」这种把多动作捆在一起的话;用「第 1 步…第 2 步…」拆开,一步一句。合并步骤会导致执行顺序歧义。用户会通过对话给你下任务。请用中文清晰、**逐步单列**说明操作计划;不要编造不存在的 API 或工具调用标签。`;整个程序源代码
// ollama_langchain_chat.js// 参考 robot_chat_agent.js:仅 Ollama + LangChain 多轮对话,不注册、不绑定任何 toolsimport { ChatOllama } from "@langchain/ollama";import { AIMessage, HumanMessage, SystemMessage } from "@langchain/core/messages";import readline from "readline";const MODEL_NAME = "deepseek-v3.2:cloud";const model = new ChatOllama({ model: MODEL_NAME, temperature: 0.7, baseUrl: process.env.OLLAMA_BASE_URL ?? "http://127.0.0.1:11434",});/** 与 car_agent / stream 示例一致:content 可能是 string 或块数组 */function chunkText(chunk) { const c = chunk?.content; if (typeof c === "string") { return c; } if (Array.isArray(c)) { return c.map((b) => (typeof b === "string" ? b : b?.text ?? "")).join(""); } return "";}/** 默认人设:与 robot_chat_agent 一致的「轮式 + 机械臂 + 视觉」能力(本脚本仅对话,不自动执行动作) */const DEFAULT_ROBOT_SYSTEM_PROMPT = `你是一个实体机器人助手。你的能力包括:- 底盘轮子:可以前进、后退、左转、右转,也可以停止或转向大致方位。- 机械臂:可以抓取物体,也可以放下、释放物体。- 视觉:可以拍照;可以根据照片识别物体,并说明其相对方位(方向与距离等)。**拍照与识别(必须分开阶段,不要「一镜定两位」)** 每次拍照只反映**当前机位、当前朝向**下的画面;报告的距离/方位只对这一刻有效。 「把 A 拿到 B」这类任务:**不要**说「拍一张照片同时认出 A 和 B 再一起导航」——在还没靠近 A、机位未变时,同一张图里对 B 的定位不可靠,后续移动后更会失效。 正确说法:先停下**拍一张、只识别 A** → 靠近并抓取 → **再拍一张、只识别 B** → 再移动与放置。A 与 B 各用至少一轮「拍照→识别」,中间可能还要在移动后再拍。**一步一事(禁止合并步骤)** 每个**编号或每条**说明里只做**一种**具体操作:例如「拍照」单独一步,「识别某物」单独一步,「左转」「前进 X 步」「抓取」「释放」各算一步。 **不要**在同一条里写「拍照并识别……并移动」这种把多动作捆在一起的话;用「第 1 步…第 2 步…」拆开,一步一句。合并步骤会导致执行顺序歧义。用户会通过对话给你下任务。请用中文清晰、**逐步单列**说明操作计划;不要编造不存在的 API 或工具调用标签。`;const SYSTEM_PROMPT = process.env.OLLAMA_SYSTEM_PROMPT ?? DEFAULT_ROBOT_SYSTEM_PROMPT;/** @type {import("@langchain/core/messages").BaseMessage[]} */let messages = [new SystemMessage(SYSTEM_PROMPT)];const rl = readline.createInterface({ input: process.stdin, output: process.stdout, prompt: "🧑 你: ",});console.log("\n🤖 Ollama + LangChain 纯对话(无工具)");console.log(`模型: ${MODEL_NAME}`);console.log("环境变量: OLLAMA_BASE_URL、OLLAMA_SYSTEM_PROMPT(可选)");console.log("输入 exit 退出\n");rl.prompt();rl.on("line", async (line) => { const userInput = line.trim(); if (userInput === "exit") { console.log("再见!"); rl.close(); process.exit(0); } if (userInput === "") { rl.prompt(); return; } messages.push(new HumanMessage(userInput)); process.stdout.write("\n🤖: "); let fullResponse = ""; try { const stream = await model.stream(messages); for await (const chunk of stream) { const text = chunkText(chunk); if (text) { process.stdout.write(text); fullResponse += text; } } } catch (err) { console.error("\n请求失败:", err.message ?? err); messages.pop(); console.log(""); rl.prompt(); return; } console.log("\n"); messages.push(new AIMessage(fullResponse)); rl.prompt();});ollama
安装 cloud
ollama pull deepseek-v3.2:cloudollama pull gemini-3-flash-preview:cloud这个级别的免费使用相当可以了。
把任务分解了,先用plan模式也就chat模式做一个plan,然后用任务执行模式去执行。发现准确多了
// ollama_langchain_plan_then_execute.js// 两阶段:① 纯 Chat(无 tools)输出**普通文字**计划;② 把原文交给带 tools 的 exec 模型自行理解并调用工具(不解析成 steps)//// 用法:// node examples/ollama_langchain_plan_then_execute.js// PLAN_ONLY=1 node ... # 只生成计划,不执行//// Gemini 3(如 gemini-3-flash-preview:cloud)经原生 Ollama API 做多轮 tool 时可能 400:// Function call is missing a thought_signature// 本脚本对**执行阶段**在检测到 gemini-3 或设置 OLLAMA_OPENAI_V1=1 时,自动改用// ChatOpenAI + OLLAMA_BASE_URL 的 OpenAI 兼容路径(…/v1)。可 OLLAMA_OPENAI_V1=0 强制用 ChatOllama。import { ChatOllama } from "@langchain/ollama";import { ChatOpenAI } from "@langchain/openai";import { AIMessage, HumanMessage, SystemMessage, isAIMessage } from "@langchain/core/messages";import { tool } from "@langchain/core/tools";import { createReactAgent } from "@langchain/langgraph/prebuilt";import { MemorySaver } from "@langchain/langgraph";import { randomUUID } from "node:crypto";import { z } from "zod";import fs from "fs/promises";import readline from "readline";import path from "path";//const MODEL_NAME = "deepseek-v3.2:cloud";const MODEL_NAME = "gemini-3-flash-preview:cloud";const BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://127.0.0.1:11434";const OLLAMA_OPENAI_V1 = process.env.OLLAMA_OPENAI_V1;/** 计划阶段:默认与 ollama_langchain_chat 相同用法(deepseek-v3.2:cloud + temperature 0.7);可 PLAN_MODEL 覆盖 */const PLAN_MODEL = MODEL_NAME;/** 仅当设置正整数时生效,对应 Ollama numPredict;不设则与纯 chat 一致不限制 */const PLAN_NUM_PREDICT = Number.parseInt(process.env.PLAN_NUM_PREDICT ?? "", 10);const PLAN_ONLY = process.env.PLAN_ONLY === "1" || process.env.PLAN_ONLY === "true";const RECURSION_LIMIT = Math.max( 25, Number.parseInt(process.env.LANGGRAPH_RECURSION_LIMIT ?? "", 10) || 64,);function ollamaOpenAiCompatBaseUrl(baseUrl) { const u = String(baseUrl ?? "").replace(/\/+$/, ""); return u || "http://127.0.0.1:11434";}/** Gemini 3 :cloud + 原生 /api/chat 会丢 thought_signature;经 /v1 可走通 tool 多轮 */const useOpenAiV1ForExec = OLLAMA_OPENAI_V1 === "0" || OLLAMA_OPENAI_V1 === "false" ? false : OLLAMA_OPENAI_V1 === "1" || OLLAMA_OPENAI_V1 === "true" ? true : /gemini-3/i.test(MODEL_NAME);const openAiCompatBaseUrl = `${ollamaOpenAiCompatBaseUrl(BASE_URL)}/v1`;/** 分解任务:与 ollama_langchain_chat.js 同构(仅 model/temperature/baseUrl,不绑工具);单次 stream */const planModel = new ChatOllama({ model: PLAN_MODEL, temperature: 0.7, baseUrl: BASE_URL, ...(Number.isFinite(PLAN_NUM_PREDICT) && PLAN_NUM_PREDICT > 0 ? { numPredict: PLAN_NUM_PREDICT } : {}),});/** 执行计划用(ReAct):Gemini 3 用 ChatOpenAI→Ollama /v1,其余用 ChatOllama */const execBaseModel = useOpenAiV1ForExec ? new ChatOpenAI({ model: MODEL_NAME, temperature: 0.2, apiKey: process.env.OPENAI_API_KEY ?? "ollama", configuration: { baseURL: openAiCompatBaseUrl }, }) : new ChatOllama({ model: MODEL_NAME, temperature: 0.2, baseUrl: BASE_URL, });// ==================== 硬件模拟(与 robot_agent 一致) ====================async function moveForward(steps = 1) { console.log(`🚗 小车向前移动 ${steps} 步`); return `向前移动了 ${steps} 步`;}async function moveBackward(steps = 1) { console.log(`🚗 小车向后移动 ${steps} 步`); return `向后移动了 ${steps} 步`;}async function turnLeft() { console.log(`🚗 小车左转`); return `向左转`;}async function turnRight() { console.log(`🚗 小车右转`); return `向右转`;}async function stop() { console.log(`🚗 小车停止`); return `停止`;}async function graspObject() { console.log(`🦾 机械臂抓取`); return `已抓取`;}async function releaseObject() { console.log(`🦾 机械臂释放`); return `已释放`;}async function takePhoto() { const timestamp = Date.now(); const filename = `photo_${timestamp}.jpg`; const filepath = path.join("./photos", filename); await fs.mkdir("./photos", { recursive: true }); await fs.writeFile(filepath, `模拟照片 ${timestamp}`); console.log(`📸 拍照保存至 ${filepath}`); return filepath;}function pickTargetObjectFromArgs(args) { if (args == null || typeof args !== "object") { return null; } const direct = args.targetObject ?? args.target_object ?? args.object ?? args.name; if (direct != null && String(direct).trim()) { return String(direct).trim(); } const inp = args.input; if (inp == null) { return null; } try { const parsed = typeof inp === "string" ? JSON.parse(inp) : inp; if (parsed && typeof parsed === "object") { const t = parsed.targetObject ?? parsed.target_object ?? parsed.object; if (t != null && String(t).trim()) { return String(t).trim(); } } } catch { /* ignore */ } return null;}function pickImagePathFromArgs(args) { if (args == null || typeof args !== "object") { return null; } const direct = args.imagePath ?? args.image_path ?? args.photoPath ?? args.path; if (direct != null && String(direct).trim()) { return String(direct).trim(); } const inp = args.input; if (inp == null) { return null; } try { const parsed = typeof inp === "string" ? JSON.parse(inp) : inp; if (parsed && typeof parsed === "object") { const p = parsed.imagePath ?? parsed.image_path ?? parsed.photoPath ?? parsed.path; if (p != null && String(p).trim()) { return String(p).trim(); } } } catch { /* ignore */ } return null;}async function recognizeObjectFromPhoto(photoPath, targetObject = null) { const wanted = targetObject && String(targetObject).trim(); console.log(`🔍 识别照片: ${photoPath}`); let objectName; if (wanted) { objectName = wanted; } else { const pool = ["杯子", "手机", "书", "金戒指", "首饰收纳盒", "黄色衣服", "绿色篮子"]; objectName = pool[Math.floor(Math.random() * pool.length)]; } const distance = (Math.random() * 4.5 + 0.5).toFixed(1); const angle = Math.floor(Math.random() * 121) - 60; const direction = angle === 0 ? "正前方" : angle < 0 ? `左前方 ${-angle} 度` : `右前方 ${angle} 度`; const result = `识别到:${objectName},位于 ${direction},距离 ${distance} 米`; console.log(`🎯 ${result}`); return result;}const chassisMoveTool = tool( async ({ action, steps }) => { const s = steps ?? 1; switch (action) { case "forward": return moveForward(s); case "backward": return moveBackward(s); case "turn_left": return turnLeft(); case "turn_right": return turnRight(); case "stop": return stop(); default: return `未知底盘动作: ${action}`; } }, { name: "chassis_move", description: "底盘:forward / backward / turn_left / turn_right / stop;forward、backward 可带 steps。", schema: z.object({ action: z.enum(["forward", "backward", "turn_left", "turn_right", "stop"]), steps: z.number().optional(), }), },);const armGripTool = tool( async ({ action }) => { return action === "grasp" ? graspObject() : releaseObject(); }, { name: "arm_grip", description: "机械臂 grasp 或 release", schema: z.object({ action: z.enum(["grasp", "release"]) }), },);const takePhotoTool = tool(async () => takePhoto(), { name: "take_photo", description: "拍照,返回路径", schema: z.object({}),});const detectObjectTool = tool( async (args) => { const imagePath = pickImagePathFromArgs(args); if (!imagePath) { return "缺少 imagePath,请先 take_photo。"; } const target = pickTargetObjectFromArgs(args); return recognizeObjectFromPhoto(imagePath, target); }, { name: "detect_object", description: "识别:需要 imagePath 与可选 targetObject", schema: z.object({ imagePath: z.string(), targetObject: z.string().optional(), input: z.string().optional(), }), },);const tools = [chassisMoveTool, armGripTool, takePhotoTool, detectObjectTool];// ==================== XML 补丁(ReAct 用) ====================function aiMessageTextContent(msg) { const c = msg?.content; if (typeof c === "string") { return c; } if (Array.isArray(c)) { return c.map((b) => (typeof b === "string" ? b : b?.text ?? "")).join(""); } return "";}function parseInvokeBodyArgs(body) { const raw = body ?? ""; const trimmed = raw.trim(); const fromTags = {}; const paramRe = /<parameter\s+name="([^"]+)"[^>]*>([\s\S]*?)<\/parameter>/gi; let pm; while ((pm = paramRe.exec(raw)) !== null) { fromTags[pm[1]] = pm[2].trim(); } if (trimmed.startsWith("{")) { try { return { ...fromTags, ...JSON.parse(trimmed) }; } catch { return fromTags; } } return fromTags;}function parseXmlInvokeToolCalls(text, allowedNames) { const calls = []; const re = /<invoke\s+name="([^"]+)"(?:\s*\/>|>([\s\S]*?)<\/invoke>)/gi; let m; while ((m = re.exec(text)) !== null) { const name = m[1].trim(); if (!allowedNames.has(name)) { continue; } const body = m[2] !== undefined ? m[2] : ""; calls.push({ name, args: parseInvokeBodyArgs(body) }); } return calls;}function stripFunctionCallsXml(text) { return text.replace(/<function_calls>[\s\S]*?<\/function_calls>/gi, "").trim();}function patchAiMessageIfXmlToolCalls(lastMessage, allowedNames) { if (!isAIMessage(lastMessage) || (lastMessage.tool_calls?.length ?? 0) > 0) { return null; } const text = aiMessageTextContent(lastMessage); if (!text.includes("<invoke") || !text.includes("name=")) { return null; } const parsed = parseXmlInvokeToolCalls(text, allowedNames); if (parsed.length === 0) { return null; } const visible = stripFunctionCallsXml(text) .replace(/<invoke\s+name="[^"]+"(?:\s*\/>|>[\s\S]*?<\/invoke>)/gi, "") .trim(); return new AIMessage({ id: lastMessage.id, content: visible, tool_calls: parsed.map((p) => ({ name: p.name, args: p.args, id: `call_${randomUUID().replace(/-/g, "").slice(0, 24)}`, })), });}async function postModelXmlToolFix(state) { const allowed = new Set(tools.map((t) => t.name)); const { messages } = state; const last = messages[messages.length - 1]; const patched = patchAiMessageIfXmlToolCalls(last, allowed); return patched ? { messages: [patched] } : {};}// ==================== ① Chat 分解计划(仅普通文字,不解析) ====================const PLAN_SYSTEM = `你是一个实体机器人助手(**本对话模式不连接真实硬件、也不会调用任何工具**)。你的设定能力包括:底盘轮子前后左右与停止、机械臂抓取与放下、拍照、根据照片识别物体与方位——请只用**自然语言**与用户交流:回答问题、讲解步骤、做计划或教学。**本模式禁止**:不要输出工具调用、不要写 XML/function_calls、不要假装已经拍照或识别(除非用户是在做思想实验,应说明「若真实机器人上会…」)。**拍照与识别(规划时要讲清)** 分阶段拍照,一镜不要同时定两个目标的精确导航位;「把 A 放到 B」要先针对 A 再针对 B。**一步一事** 说明步骤时每条只写一种动作,不要「拍照并识别并移动」捆成一条。用户会通过对话给你下任务;你以聊天方式回复即可。`;function stripCodeFences(s) { const m = s.match(/```(?:\w*)?\s*([\s\S]*?)```/); if (m) { return m[1].trim(); } return s.trim();}/** 与 ollama_langchain_chat.js 的 chunkText 一致 */function planStreamChunkText(chunk) { const c = chunk?.content; if (typeof c === "string") { return c; } if (Array.isArray(c)) { return c.map((b) => (typeof b === "string" ? b : b?.text ?? "")).join(""); } return "";}async function generatePlan(userTask) { console.log("⏳ 正在分解任务…"); process.stdout.write("\n🤖: "); let text = ""; const stream = await planModel.stream([ new SystemMessage(PLAN_SYSTEM), new HumanMessage(userTask), ]); for await (const chunk of stream) { const piece = planStreamChunkText(chunk); if (piece) { process.stdout.write(piece); text += piece; } } console.log("\n"); const planText = stripCodeFences(text); if (!planText) { throw new Error("计划阶段未产生文字内容"); } console.log("✅ 计划已生成(纯文本,将原样交给执行模型)。"); return planText;}// ==================== ② ReAct 执行(阅读文字计划 + 工具) ====================const EXEC_SYSTEM = `你是执行阶段的机器人控制代理。你会收到「用户原始任务」和一段「规划阶段输出的文字计划」。- 根据文字计划,**仅通过工具**落实:take_photo、detect_object、chassis_move、arm_grip- detect_object 的 imagePath 必须是**最近一次 take_photo 返回的完整文件路径**(不要编造路径)- 按计划的逻辑顺序调用工具;若计划含糊,用最少合理步骤补全- 每次尽量一次一个工具调用;用标准函数调用,不要写 XML 伪调用`;const checkpointer = new MemorySaver();const execAgent = createReactAgent({ llm: execBaseModel.bindTools(tools), tools, checkpointer, postModelHook: postModelXmlToolFix, systemMessage: EXEC_SYSTEM,});function createFunctionCallsStreamFilter() { const OPEN = "<function_calls>"; const CLOSE = "</function_calls>"; let buf = ""; let inXml = false; function holdbackSuffix(s, token) { const sl = s.toLowerCase(); const tl = token.toLowerCase(); const max = Math.min(s.length, token.length - 1); for (let i = max; i >= 1; i--) { if (tl.startsWith(sl.slice(-i))) { return i; } } return 0; } return { push(raw) { if (raw == null || raw === "") { return ""; } buf += String(raw); let out = ""; while (buf.length > 0) { if (!inXml) { const i = buf.toLowerCase().indexOf(OPEN); if (i === -1) { const hb = holdbackSuffix(buf, OPEN); out += buf.slice(0, buf.length - hb); buf = buf.slice(buf.length - hb); break; } out += buf.slice(0, i); buf = buf.slice(i + OPEN.length); inXml = true; continue; } const j = buf.toLowerCase().indexOf(CLOSE); if (j === -1) { const hb = holdbackSuffix(buf, CLOSE); buf = buf.slice(0, buf.length - hb); break; } buf = buf.slice(j + CLOSE.length); inXml = false; } return out; }, flush() { if (inXml) { buf = ""; inXml = false; return ""; } const hb = holdbackSuffix(buf, OPEN); const emit = buf.slice(0, buf.length - hb); buf = buf.slice(buf.length - hb); return emit; }, };}async function executePlanWithAgent(userTask, planText) { console.log("⏳ LangGraph ReAct 执行中(阅读文字计划并调用工具)…"); const prompt = `用户原始任务:\n${userTask}\n\n` + `已经为您分解好的任务计划(请理解并按序用工具落实;detect_object 时使用上一轮 take_photo 返回的完整路径):\n${planText}`; const config = { version: "v2", configurable: { thread_id: `plan-exec-${Date.now()}` }, recursionLimit: RECURSION_LIMIT, }; const xmlStreamFilter = createFunctionCallsStreamFilter(); try { const stream = await execAgent.streamEvents( { messages: [new HumanMessage(prompt)] }, config, ); for await (const event of stream) { if (event.event === "on_chat_model_stream") { const chunk = event.data?.chunk?.content; const pieces = Array.isArray(chunk) ? chunk.map((b) => (typeof b === "string" ? b : b?.text ?? "")) : typeof chunk === "string" ? [chunk] : []; for (const p of pieces) { const visible = xmlStreamFilter.push(p); if (visible) { process.stdout.write(visible); } } } if (event.event === "on_tool_start") { console.log(`\n🔧 ${event.name} ${JSON.stringify(event.data.input)}`); } if (event.event === "on_tool_end") { const out = event.data?.output; let text; if (typeof out === "string") { text = out; } else if (out && typeof out === "object") { const c = out.content ?? out.kwargs?.content; text = typeof c === "string" ? c : Array.isArray(c) ? c.map((b) => b?.text ?? "").join("") : out.text ?? JSON.stringify(out); } else { text = String(out); } console.log(`✅ ${text}`); } } } catch (err) { if (err?.lc_error_code === "GRAPH_RECURSION_LIMIT") { console.error( `\n⚠️ 图递归上限 ${RECURSION_LIMIT} 仍不够(或模型在空转)。` + "可提高 LANGGRAPH_RECURSION_LIMIT,或缩短单次任务。", ); } else { throw err; } } const tail = xmlStreamFilter.flush(); if (tail) { process.stdout.write(tail); } console.log("\n");}// ==================== REPL ====================const rl = readline.createInterface({ input: process.stdin, output: process.stdout, prompt: "🧑 任务: ",});console.log("\n两阶段:① Chat 输出**普通文字**计划 ② 同一原文交给 exec ReAct,由模型理解并调工具");console.log(`执行阶段模型: ${MODEL_NAME}`);console.log( useOpenAiV1ForExec ? `执行阶段客户端: ChatOpenAI(OpenAI 兼容端点 ${openAiCompatBaseUrl},规避 Gemini 3 thought_signature 400)` : `执行阶段客户端: ChatOllama(${BASE_URL})`,);console.log( `计划阶段: 与 ollama_langchain_chat 相同 ChatOllama 用法(temperature=0.7,默认不限制 numPredict)。模型=${PLAN_MODEL}${ Number.isFinite(PLAN_NUM_PREDICT) && PLAN_NUM_PREDICT > 0 ? `;numPredict=${PLAN_NUM_PREDICT}` : "" }`,);console.log("说明: 计划阶段**不解析**为 steps;执行阶段用 take_photo / detect_object / chassis_move / arm_grip。");console.log( "环境变量: OLLAMA_BASE_URL、OLLAMA_OPENAI_V1(1/0 强制开关)、PLAN_ONLY、PLAN_NUM_PREDICT、LANGGRAPH_RECURSION_LIMIT",);console.log("执行方式: 始终 LangGraph ReAct;PLAN_ONLY=1 只生成计划并打印原文,跳过执行。");console.log("输入 exit 退出");console.log("提示: ① 阶段若长时间无输出,请检查 OLLAMA_BASE_URL、网络与云端模型是否可用。\n");rl.prompt();rl.on("line", async (line) => { const userTask = line.trim(); if (userTask === "exit") { rl.close(); process.exit(0); } if (!userTask) { rl.prompt(); return; } try { console.log("\n━━ ① 正在分解任务 ━━"); const planText = await generatePlan(userTask); if (PLAN_ONLY) { console.log("\n── 计划原文 ──\n" + planText + "\n"); console.log("\n(PLAN_ONLY=1,跳过执行)\n"); rl.prompt(); return; } console.log("\n━━ ② 执行工具 ━━"); await executePlanWithAgent(userTask, planText); console.log("\n✅ 本轮结束\n"); } catch (e) { console.error("❌", e.message ?? e); } rl.prompt();});这一次进化成了 分步骤执行
不是将完整的计划塞给执行 Agent,而是分步骤给执行Agent保证执行分解的准确性。
// ollama_langchain_plan_then_execute.js// 两阶段**自动衔接**(中间无需人工确认):① **正在分解任务**(Plan:无工具,流式文字计划)→ ② **正在执行任务**(Build:LangGraph ReAct + 模拟工具)。// Build 将计划中「1. 2. …」解析为步骤数组,按 EXEC_BATCH_SIZE(默认 3)分批执行。// Plan / Build 可用同一模型,也可 PLAN_MODEL≠EXEC_MODEL。//// 用法:// node examples/ollama_langchain_plan_then_execute.js// PLAN_ONLY=1 node ... # 只做①分解任务,不进入②// BUILD_CONFIRM=1 node ... # ①与②之间增加 run/replan/skip 确认(调试用)// EXEC_BATCH_SIZE=4 node ... # 每批最多 4 条规划步骤(1–10)// PLAN_MODEL=m1 EXEC_MODEL=m2 ...# 两阶段各指定模型// (旧 AUTO_EXEC=1 已无必要,默认即①后自动②;需要中间停一下时用 BUILD_CONFIRM=1。)//// Gemini 3(如 gemini-3-flash-preview:cloud)经原生 Ollama API 做多轮 tool 时可能 400:// Function call is missing a thought_signature// 本脚本对**执行阶段**在检测到 gemini-3 或设置 OLLAMA_OPENAI_V1=1 时,自动改用// ChatOpenAI + OLLAMA_BASE_URL 的 OpenAI 兼容路径(…/v1)。可 OLLAMA_OPENAI_V1=0 强制用 ChatOllama。//// 经 Ollama /v1 转 Gemini 时,若 ToolMessage 未带 name,可能 400:// function_response.name: Name cannot be empty// 故在 preModelHook 里按 tool_call_id 从历史上 AI tool_calls 补全 name。import { ChatOllama } from "@langchain/ollama";import { ChatOpenAI } from "@langchain/openai";import { AIMessage, HumanMessage, SystemMessage, ToolMessage, isAIMessage, isToolMessage,} from "@langchain/core/messages";import { tool } from "@langchain/core/tools";import { createReactAgent } from "@langchain/langgraph/prebuilt";import { MemorySaver } from "@langchain/langgraph";import { randomUUID } from "node:crypto";import { z } from "zod";import fs from "fs/promises";import readline from "readline";import path from "path";//const MODEL_NAME = "deepseek-v3.2:cloud";const MODEL_NAME = "gemini-3-flash-preview:cloud";const BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://127.0.0.1:11434";const OLLAMA_OPENAI_V1 = process.env.OLLAMA_OPENAI_V1;/** ① 分解任务(Plan):无工具 */const PLAN_MODEL = process.env.PLAN_MODEL ?? MODEL_NAME;/** ② 执行任务(Build):带工具 ReAct */const EXEC_MODEL = process.env.EXEC_MODEL ?? MODEL_NAME;/** 仅当设置正整数时生效,对应 Ollama numPredict;不设则与纯 chat 一致不限制 */const PLAN_NUM_PREDICT = Number.parseInt(process.env.PLAN_NUM_PREDICT ?? "", 10);const PLAN_ONLY = process.env.PLAN_ONLY === "1" || process.env.PLAN_ONLY === "true";/** 为 true 时 Plan 结束后暂停,人工输入 run/replan/skip(默认 false,①②自动连续) */const BUILD_CONFIRM = process.env.BUILD_CONFIRM === "1" || process.env.BUILD_CONFIRM === "true";/** 从计划中切分的每条为一批内最多几条(1–10,默认 3) */const EXEC_BATCH_SIZE_RAW = Number.parseInt(process.env.EXEC_BATCH_SIZE ?? "3", 10);const EXEC_BATCH_SIZE = Number.isFinite(EXEC_BATCH_SIZE_RAW) ? Math.min(10, Math.max(1, EXEC_BATCH_SIZE_RAW)) : 3;const RECURSION_LIMIT = Math.max( 25, Number.parseInt(process.env.LANGGRAPH_RECURSION_LIMIT ?? "", 10) || 64,);function ollamaOpenAiCompatBaseUrl(baseUrl) { const u = String(baseUrl ?? "").replace(/\/+$/, ""); return u || "http://127.0.0.1:11434";}/** Gemini 3 :cloud + 原生 /api/chat 会丢 thought_signature;经 /v1 可走通 tool 多轮(按 EXEC_MODEL 判断) */const useOpenAiV1ForExec = OLLAMA_OPENAI_V1 === "0" || OLLAMA_OPENAI_V1 === "false" ? false : OLLAMA_OPENAI_V1 === "1" || OLLAMA_OPENAI_V1 === "true" ? true : /gemini-3/i.test(EXEC_MODEL);const openAiCompatBaseUrl = `${ollamaOpenAiCompatBaseUrl(BASE_URL)}/v1`;/** ① 与 ollama_langchain_chat.js 同构(无工具);单次 stream */const planModel = new ChatOllama({ model: PLAN_MODEL, temperature: 0.7, baseUrl: BASE_URL, ...(Number.isFinite(PLAN_NUM_PREDICT) && PLAN_NUM_PREDICT > 0 ? { numPredict: PLAN_NUM_PREDICT } : {}),});/** ② Build(ReAct):Gemini 3 用 ChatOpenAI→Ollama /v1,其余用 ChatOllama */const execBaseModel = useOpenAiV1ForExec ? new ChatOpenAI({ model: EXEC_MODEL, temperature: 0.2, apiKey: process.env.OPENAI_API_KEY ?? "ollama", configuration: { baseURL: openAiCompatBaseUrl }, }) : new ChatOllama({ model: EXEC_MODEL, temperature: 0.2, baseUrl: BASE_URL, });// ==================== 硬件模拟(与 robot_agent 一致) ====================async function moveForward(steps = 1) { console.log(`🚗 小车向前移动 ${steps} 步`); return `向前移动了 ${steps} 步`;}async function moveBackward(steps = 1) { console.log(`🚗 小车向后移动 ${steps} 步`); return `向后移动了 ${steps} 步`;}async function turnLeft() { console.log(`🚗 小车左转`); return `向左转`;}async function turnRight() { console.log(`🚗 小车右转`); return `向右转`;}async function stop() { console.log(`🚗 小车停止`); return `停止`;}/** * 单条复合导航:先按角度转向,再直线前进若干步(底层移动模块会按此顺序执行)。 * angle_degrees:正=右转、负=左转(度),可与 detect「右前方 X° / 左前方 X°」对应;约 0 则只前进。 */async function turnThenForward(angleDegrees, forwardSteps) { const ang = Number(angleDegrees); const steps = Math.max(1, Math.floor(Number(forwardSteps)) || 1); const parts = []; if (Number.isFinite(ang) && Math.abs(ang) >= 0.01) { if (ang > 0) { console.log(`🚗 [复合移动] 先右转 ${ang}°`); parts.push(`右转 ${ang}°`); } else { console.log(`🚗 [复合移动] 先左转 ${Math.abs(ang)}°`); parts.push(`左转 ${Math.abs(ang)}°`); } } const fwd = await moveForward(steps); parts.push(`前进 ${steps} 步`); return `已执行:${parts.join(",")}(${fwd})`;}async function graspObject() { console.log(`🦾 机械臂抓取`); return `已抓取`;}async function releaseObject() { console.log(`🦾 机械臂释放`); return `已释放`;}async function takePhoto() { const timestamp = Date.now(); const filename = `photo_${timestamp}.jpg`; const filepath = path.join("./photos", filename); await fs.mkdir("./photos", { recursive: true }); await fs.writeFile(filepath, `模拟照片 ${timestamp}`); console.log(`📸 拍照保存至 ${filepath}`); return filepath;}function pickTargetObjectFromArgs(args) { if (args == null || typeof args !== "object") { return null; } const direct = args.targetObject ?? args.target_object ?? args.object ?? args.name; if (direct != null && String(direct).trim()) { return String(direct).trim(); } const inp = args.input; if (inp == null) { return null; } try { const parsed = typeof inp === "string" ? JSON.parse(inp) : inp; if (parsed && typeof parsed === "object") { const t = parsed.targetObject ?? parsed.target_object ?? parsed.object; if (t != null && String(t).trim()) { return String(t).trim(); } } } catch { /* ignore */ } return null;}function pickImagePathFromArgs(args) { if (args == null || typeof args !== "object") { return null; } const direct = args.imagePath ?? args.image_path ?? args.photoPath ?? args.path; if (direct != null && String(direct).trim()) { return String(direct).trim(); } const inp = args.input; if (inp == null) { return null; } try { const parsed = typeof inp === "string" ? JSON.parse(inp) : inp; if (parsed && typeof parsed === "object") { const p = parsed.imagePath ?? parsed.image_path ?? parsed.photoPath ?? parsed.path; if (p != null && String(p).trim()) { return String(p).trim(); } } } catch { /* ignore */ } return null;}async function recognizeObjectFromPhoto(photoPath, targetObject = null) { const wanted = targetObject && String(targetObject).trim(); console.log(`🔍 识别照片: ${photoPath}`); let objectName; if (wanted) { objectName = wanted; } else { const pool = ["杯子", "手机", "书", "金戒指", "首饰收纳盒", "黄色衣服", "绿色篮子"]; objectName = pool[Math.floor(Math.random() * pool.length)]; } const distance = (Math.random() * 4.5 + 0.5).toFixed(1); const angle = Math.floor(Math.random() * 121) - 60; const direction = angle === 0 ? "正前方" : angle < 0 ? `左前方 ${-angle} 度` : `右前方 ${angle} 度`; const result = `识别到:${objectName},位于 ${direction},距离 ${distance} 米`; console.log(`🎯 ${result}`); return result;}const chassisMoveSchema = z .object({ action: z.enum([ "forward", "backward", "turn_left", "turn_right", "stop", "turn_then_forward", ]), steps: z .number() .optional() .describe("forward/backward/turn_then_forward 的前进步数;turn_then_forward 默认 1"), angle_degrees: z .number() .optional() .describe( "仅 turn_then_forward:相对转角,正=右转、负=左转(度),如识别结果「右前方 35 度」可取 35", ), }) .refine( (d) => d.action !== "turn_then_forward" || (d.angle_degrees !== undefined && d.angle_degrees !== null && Number.isFinite(Number(d.angle_degrees))), { message: "turn_then_forward 必须提供 angle_degrees(数字,度)", path: ["angle_degrees"] }, );const chassisMoveTool = tool( async ({ action, steps, angle_degrees }) => { const s = steps ?? 1; switch (action) { case "forward": return moveForward(s); case "backward": return moveBackward(s); case "turn_left": return turnLeft(); case "turn_right": return turnRight(); case "stop": return stop(); case "turn_then_forward": return turnThenForward(angle_degrees, s); default: return `未知底盘动作: ${action}`; } }, { name: "chassis_move", description: "底盘:forward / backward / turn_left / turn_right / stop;" + "**turn_then_forward** 一条指令同时带转角与前进步数:底层**先转弯再前进**(angle_degrees 正=右转、负=左转;steps=前进步数)。靠近目标时可优先用 turn_then_forward 减少调用次数。", schema: chassisMoveSchema, },);const armGripTool = tool( async ({ action }) => { return action === "grasp" ? graspObject() : releaseObject(); }, { name: "arm_grip", description: "机械臂 grasp 或 release", schema: z.object({ action: z.enum(["grasp", "release"]) }), },);const takePhotoTool = tool(async () => takePhoto(), { name: "take_photo", description: "拍照,返回路径", schema: z.object({}),});const detectObjectTool = tool( async (args) => { const imagePath = pickImagePathFromArgs(args); if (!imagePath) { return "缺少 imagePath,请先 take_photo。"; } const target = pickTargetObjectFromArgs(args); return recognizeObjectFromPhoto(imagePath, target); }, { name: "detect_object", description: "识别:需要 imagePath 与可选 targetObject。若结果含「左/右前方 X 度、距离 Y 米」,可用 chassis_move turn_then_forward(angle_degrees≈±X, steps 按距离估) 一次完成转向+靠近。", schema: z.object({ imagePath: z.string(), targetObject: z.string().optional(), input: z.string().optional(), }), },);const tools = [chassisMoveTool, armGripTool, takePhotoTool, detectObjectTool];// ==================== XML 补丁(ReAct 用) ====================function aiMessageTextContent(msg) { const c = msg?.content; if (typeof c === "string") { return c; } if (Array.isArray(c)) { return c.map((b) => (typeof b === "string" ? b : b?.text ?? "")).join(""); } return "";}function parseInvokeBodyArgs(body) { const raw = body ?? ""; const trimmed = raw.trim(); const fromTags = {}; const paramRe = /<parameter\s+name="([^"]+)"[^>]*>([\s\S]*?)<\/parameter>/gi; let pm; while ((pm = paramRe.exec(raw)) !== null) { fromTags[pm[1]] = pm[2].trim(); } if (trimmed.startsWith("{")) { try { return { ...fromTags, ...JSON.parse(trimmed) }; } catch { return fromTags; } } return fromTags;}function parseXmlInvokeToolCalls(text, allowedNames) { const calls = []; const re = /<invoke\s+name="([^"]+)"(?:\s*\/>|>([\s\S]*?)<\/invoke>)/gi; let m; while ((m = re.exec(text)) !== null) { const name = m[1].trim(); if (!allowedNames.has(name)) { continue; } const body = m[2] !== undefined ? m[2] : ""; calls.push({ name, args: parseInvokeBodyArgs(body) }); } return calls;}function stripFunctionCallsXml(text) { return text.replace(/<function_calls>[\s\S]*?<\/function_calls>/gi, "").trim();}function patchAiMessageIfXmlToolCalls(lastMessage, allowedNames) { if (!isAIMessage(lastMessage) || (lastMessage.tool_calls?.length ?? 0) > 0) { return null; } const text = aiMessageTextContent(lastMessage); if (!text.includes("<invoke") || !text.includes("name=")) { return null; } const parsed = parseXmlInvokeToolCalls(text, allowedNames); if (parsed.length === 0) { return null; } const visible = stripFunctionCallsXml(text) .replace(/<invoke\s+name="[^"]+"(?:\s*\/>|>[\s\S]*?<\/invoke>)/gi, "") .trim(); return new AIMessage({ id: lastMessage.id, content: visible, tool_calls: parsed.map((p) => ({ name: p.name, args: p.args, id: `call_${randomUUID().replace(/-/g, "").slice(0, 24)}`, })), });}async function postModelXmlToolFix(state) { const allowed = new Set(tools.map((t) => t.name)); const { messages } = state; const last = messages[messages.length - 1]; const patched = patchAiMessageIfXmlToolCalls(last, allowed); return patched ? { messages: [patched] } : {};}/** OpenAI/Gemini 等格式下 tool_call 的 name 可能在 function.name;供 Ollama→Gemini 补全 ToolMessage.name */function toolCallEntryName(tc) { if (tc == null || typeof tc !== "object") { return null; } let name = tc.name; if ((name == null || String(name).trim() === "") && tc.function != null) { name = tc.function.name; } if (name == null || String(name).trim() === "") { return null; } return String(name).trim();}/** * Gemini 要求每条 function_response 带非空 name;LangChain 发 OpenAI「tool」消息时若缺 name, * 仅 tool_call_id 在经 Ollama 转译时可能丢函数名。每次进模型前按 id 从历史上 AI 的 tool_calls 补上。 */async function preModelEnsureToolMessageNames(state) { const { messages } = state; if (!Array.isArray(messages)) { return { llmInputMessages: [] }; } const idToName = new Map(); for (const msg of messages) { if (!isAIMessage(msg) || !msg.tool_calls?.length) { continue; } for (const tc of msg.tool_calls) { const id = tc?.id; const nm = toolCallEntryName(tc); if (id != null && nm) { idToName.set(String(id), nm); } } } /** 每轮都必须设置 llmInputMessages,否则会沿用上一轮 hook 的缓存 */ const llmInputMessages = messages.map((msg) => { if (!isToolMessage(msg)) { return msg; } const existing = msg.name; if (existing != null && String(existing).trim() !== "") { return msg; } const resolved = idToName.get(String(msg.tool_call_id ?? "")); if (!resolved) { return msg; } return new ToolMessage({ content: msg.content, tool_call_id: msg.tool_call_id, name: resolved, status: msg.status, metadata: msg.metadata, }); }); return { llmInputMessages };}// ==================== ① Chat 分解计划(仅普通文字,不解析) ====================const PLAN_SYSTEM = `你是一个实体机器人助手(**本对话模式不连接真实硬件、也不会调用任何工具**)。你的设定能力包括:底盘(可一条指令「先转角度再前进」)、机械臂抓取与放下、拍照、根据照片识别物体与方位——请只用**自然语言**与用户交流:回答问题、讲解步骤、做计划或教学。**本模式禁止**:不要输出工具调用、不要写 XML/function_calls、不要假装已经拍照或识别(除非用户是在做思想实验,应说明「若真实机器人上会…」)。**拍照与识别(规划时要讲清)** 分阶段拍照,一镜不要同时定两个目标的精确导航位;「把 A 放到 B」要先针对 A 再针对 B。**一步一事** 说明步骤时每条只写一种动作,不要「拍照并识别并移动」捆成一条。**规划动作必须带序号** 凡是要执行的先后顺序,必须用**阿拉伯数字 + 点或顿号**分行列出(1. 2. 3.…),便于与后续真实执行对照;每行一步,例如:1. 拍照 2. 识别某某 3. 底盘前进。 可先有简短开场,但**动作步骤本身从 1 起连续编号**;尽量不要用 markdown 代码围栏包住整段计划(若用了围栏,围栏内仍须保留 1. 2. 3. 格式)。用户会通过对话给你下任务;你以聊天方式回复即可。`;function stripCodeFences(s) { const m = s.match(/```(?:\w*)?\s*([\s\S]*?)```/); if (m) { return m[1].trim(); } return s.trim();}/** * 从 Plan 正文中抽出编号步骤,返回字符串数组(保留「序号. 正文」)。 * 依赖 Plan 使用 1. / 1、/ 1) 等形式;解析失败返回 [](由调用方回退为整篇一次 Build)。 */function parsePlanSteps(planText) { const body = String(planText ?? "").trim(); if (!body) { return []; } const lines = body.split(/\r?\n/); const numbered = []; const re = /^\s*(\d+)\s*[\.\)、::、]\s*(.+)$/; for (const line of lines) { const m = line.match(re); if (m) { const n = Number(m[1]); const text = m[2].trim().replace(/\*\*/g, ""); if (text) { numbered.push({ n, line: `${n}. ${text}` }); } } } if (numbered.length === 0) { return []; } numbered.sort((a, b) => a.n - b.n); return numbered.map((x) => x.line);}/** 与 ollama_langchain_chat.js 的 chunkText 一致 */function planStreamChunkText(chunk) { const c = chunk?.content; if (typeof c === "string") { return c; } if (Array.isArray(c)) { return c.map((b) => (typeof b === "string" ? b : b?.text ?? "")).join(""); } return "";}async function generatePlan(userTask) { console.log("⏳ ① 正在分解任务(Plan·无工具)…"); process.stdout.write("\n🤖: "); let text = ""; const stream = await planModel.stream([ new SystemMessage(PLAN_SYSTEM), new HumanMessage(userTask), ]); for await (const chunk of stream) { const piece = planStreamChunkText(chunk); if (piece) { process.stdout.write(piece); text += piece; } } console.log("\n"); const planText = stripCodeFences(text); if (!planText) { throw new Error("计划阶段未产生文字内容"); } console.log("✅ ① 分解任务完成(计划将用于②;默认随后自动执行任务)。"); return planText;}// ==================== ② ReAct 执行(阅读文字计划 + 工具) ====================const EXEC_SYSTEM = `你是 **② 执行任务(Build)**阶段的机器人控制代理,只能通过工具在模拟器中行动。你会收到「用户原始任务」、完整计划上下文,以及可能有的「本回合仅完成的若干条规划步骤」。- 若消息中列出**本回合步骤**,则**只**完成这些条目对应的工具链,不要做计划中更后面的步骤;完成后用一两句话总结本批结果。- **仅通过工具**落实:take_photo、detect_object、chassis_move、arm_grip- **chassis_move**:可用 **turn_then_forward** 一次完成「先按 angle_degrees 转向(正右负左),再前进 steps 步」;适合识别结果里的方位角+靠近;仍可用 turn_left/right + forward 分步- detect_object 的 imagePath 必须是**最近一次 take_photo 返回的完整文件路径**(消息里若给出「此前批次」路径须优先沿用,除非你新拍了一张)- 若本批步骤含糊,用最少合理工具补全;不要写 XML 伪调用- 在短句里可带规划序号,如「第 3 步:…」`;const checkpointer = new MemorySaver();const execAgent = createReactAgent({ llm: execBaseModel.bindTools(tools, { parallel_tool_calls: false }), tools, checkpointer, preModelHook: preModelEnsureToolMessageNames, postModelHook: postModelXmlToolFix, systemMessage: EXEC_SYSTEM,});function createFunctionCallsStreamFilter() { const OPEN = "<function_calls>"; const CLOSE = "</function_calls>"; let buf = ""; let inXml = false; function holdbackSuffix(s, token) { const sl = s.toLowerCase(); const tl = token.toLowerCase(); const max = Math.min(s.length, token.length - 1); for (let i = max; i >= 1; i--) { if (tl.startsWith(sl.slice(-i))) { return i; } } return 0; } return { push(raw) { if (raw == null || raw === "") { return ""; } buf += String(raw); let out = ""; while (buf.length > 0) { if (!inXml) { const i = buf.toLowerCase().indexOf(OPEN); if (i === -1) { const hb = holdbackSuffix(buf, OPEN); out += buf.slice(0, buf.length - hb); buf = buf.slice(buf.length - hb); break; } out += buf.slice(0, i); buf = buf.slice(i + OPEN.length); inXml = true; continue; } const j = buf.toLowerCase().indexOf(CLOSE); if (j === -1) { const hb = holdbackSuffix(buf, CLOSE); buf = buf.slice(0, buf.length - hb); break; } buf = buf.slice(j + CLOSE.length); inXml = false; } return out; }, flush() { if (inXml) { buf = ""; inXml = false; return ""; } const hb = holdbackSuffix(buf, OPEN); const emit = buf.slice(0, buf.length - hb); buf = buf.slice(buf.length - hb); return emit; }, };}/** * @param {object} [options] * @param {string[] | null} [options.batchSteps] 本回合只执行这些规划句;null 表示整篇计划一次执行 * @param {string} [options.progressSummary] 前几批的进展摘要(含拍照路径等) * @param {string} [options.threadSuffix] 区分各批 LangGraph thread_id * @returns {Promise<{ lastTakePhotoPath: string | null, toolCallCount: number }>} */async function executePlanWithAgent(userTask, planTextFull, options = {}) { const { batchSteps = null, progressSummary = "", threadSuffix = "" } = options; console.log("⏳ ② 正在执行任务(Build·ReAct)…"); let prompt = `用户原始任务:\n${userTask}\n\n`; if (progressSummary) { prompt += `【此前批次进展(须衔接)】\n${progressSummary}\n\n`; } if (batchSteps?.length) { prompt += `完整任务计划(上下文):\n${planTextFull}\n\n`; prompt += `【本回合仅完成下列规划步骤(逐项用工具;勿执行未列出步骤;做完后简短总结)】\n` + `${batchSteps.join("\n")}\n`; } else { prompt += `已经为您分解好的任务计划(请按序用工具落实;detect_object 使用上一轮 take_photo 的完整路径):\n` + `${planTextFull}`; } const config = { version: "v2", configurable: { thread_id: `plan-exec-${Date.now()}-${threadSuffix || "all"}` }, recursionLimit: RECURSION_LIMIT, }; const xmlStreamFilter = createFunctionCallsStreamFilter(); let execToolIndex = 0; let lastTakePhotoPath = null; try { const stream = await execAgent.streamEvents( { messages: [new HumanMessage(prompt)] }, config, ); for await (const event of stream) { if (event.event === "on_chat_model_stream") { const chunk = event.data?.chunk?.content; const pieces = Array.isArray(chunk) ? chunk.map((b) => (typeof b === "string" ? b : b?.text ?? "")) : typeof chunk === "string" ? [chunk] : []; for (const p of pieces) { const visible = xmlStreamFilter.push(p); if (visible) { process.stdout.write(visible); } } } if (event.event === "on_tool_start") { execToolIndex += 1; console.log( `\n【执行步骤 ${execToolIndex}】🔧 ${event.name} ${JSON.stringify(event.data.input)}`, ); } if (event.event === "on_tool_end") { const out = event.data?.output; let text; if (typeof out === "string") { text = out; } else if (out && typeof out === "object") { const c = out.content ?? out.kwargs?.content; text = typeof c === "string" ? c : Array.isArray(c) ? c.map((b) => b?.text ?? "").join("") : out.text ?? JSON.stringify(out); } else { text = String(out); } console.log(`【执行步骤 ${execToolIndex}】✅ ${text}`); if (event.name === "take_photo" && typeof text === "string") { const t = text.trim(); if (t.length > 0) { lastTakePhotoPath = t; } } } } } catch (err) { if (err?.lc_error_code === "GRAPH_RECURSION_LIMIT") { console.error( `\n⚠️ 图递归上限 ${RECURSION_LIMIT} 仍不够(或模型在空转)。` + "可提高 LANGGRAPH_RECURSION_LIMIT,或缩短单次任务。", ); } else { throw err; } } const tail = xmlStreamFilter.flush(); if (tail) { process.stdout.write(tail); } console.log("\n"); return { lastTakePhotoPath, toolCallCount: execToolIndex };}/** 将计划正文解析为步骤数组后,按 EXEC_BATCH_SIZE 多批调用 Build(每批新会话,靠摘要衔接) */async function runBuildWithBatches(userTask, planText) { const planSteps = parsePlanSteps(planText); if (planSteps.length === 0) { console.log( "\n⚠️ 未解析到编号步骤(需要 1. / 1、等形式)。本回合仍将**整篇计划一次**交给 Build。\n", ); await executePlanWithAgent(userTask, planText, {}); return; } console.log( `\n📋 规划步骤共 ${planSteps.length} 条;每批最多 ${EXEC_BATCH_SIZE} 条,共 ${Math.ceil(planSteps.length / EXEC_BATCH_SIZE)} 批。`, ); console.log(planSteps.map((s, i) => ` [${i + 1}] ${s}`).join("\n")); let progressSummary = ""; let lastPhoto = null; for (let i = 0; i < planSteps.length; i += EXEC_BATCH_SIZE) { const batch = planSteps.slice(i, i + EXEC_BATCH_SIZE); const batchNo = Math.floor(i / EXEC_BATCH_SIZE) + 1; const totalBatches = Math.ceil(planSteps.length / EXEC_BATCH_SIZE); console.log( `\n━━ ② 正在执行任务 · 第 ${batchNo}/${totalBatches} 批(本批 ${batch.length} 条规划)━━`, ); let ps = progressSummary; if (lastPhoto) { ps += `\n【衔接】最近一次 take_photo 路径: ${lastPhoto}\n`; } const { lastTakePhotoPath } = await executePlanWithAgent(userTask, planText, { batchSteps: batch, progressSummary: ps, threadSuffix: `b${batchNo}`, }); if (lastTakePhotoPath) { lastPhoto = lastTakePhotoPath; } progressSummary += `\n— 第 ${batchNo} 批已完成,对应规划:\n${batch.map((x) => ` • ${x}`).join("\n")}\n`; }}// ==================== REPL ====================function promptAnswer(rl, questionText) { return new Promise((resolve) => { rl.question(questionText, (answer) => resolve(String(answer ?? "").trim())); });}const rl = readline.createInterface({ input: process.stdin, output: process.stdout, prompt: "🧑 任务: ",});console.log( "\n流程:① **正在分解任务**(Plan,无工具)→ 自动进入 ② **正在执行任务**(Build,模拟器工具)。中间默认无需操作。",);console.log( PLAN_MODEL === EXEC_MODEL ? `①② 共用模型: ${PLAN_MODEL}` : `① 分解任务模型: ${PLAN_MODEL} | ② 执行任务模型: ${EXEC_MODEL}`,);console.log( useOpenAiV1ForExec ? `② 执行任务 HTTP: ChatOpenAI(${openAiCompatBaseUrl},规避 Gemini 3 thought_signature 400)` : `② 执行任务 HTTP: ChatOllama(${BASE_URL})`,);console.log( `① 分解任务: ChatOllama temperature=0.7,默认不限制 numPredict。模型=${PLAN_MODEL}${ Number.isFinite(PLAN_NUM_PREDICT) && PLAN_NUM_PREDICT > 0 ? `;numPredict=${PLAN_NUM_PREDICT}` : "" }`,);console.log( "说明: Plan 中 1. 2. … 会解析为步骤数组;Build 按 EXEC_BATCH_SIZE 分批执行;chassis_move 含 turn_then_forward(先转角再前进)。",);console.log( "环境变量: PLAN_MODEL、EXEC_MODEL、EXEC_BATCH_SIZE、BUILD_CONFIRM、OLLAMA_BASE_URL、OLLAMA_OPENAI_V1、PLAN_ONLY、PLAN_NUM_PREDICT、LANGGRAPH_RECURSION_LIMIT",);console.log( "BUILD_CONFIRM=1:① 结束后人工选择 run/replan/skip 再进入②(调试);默认不设置则①②自动连续。",);console.log("输入 exit 退出");console.log("提示: 若①长时间无输出,请检查 OLLAMA_BASE_URL、网络与云端模型。\n");rl.prompt();rl.on("line", async (line) => { const userTask = line.trim(); if (userTask === "exit") { rl.close(); process.exit(0); } if (!userTask) { rl.prompt(); return; } try { console.log("\n━━ ① 正在分解任务(Plan·无工具)━━"); let planText = await generatePlan(userTask); if (PLAN_ONLY) { console.log("\n── 计划原文 ──\n" + planText + "\n"); const steps = parsePlanSteps(planText); if (steps.length > 0) { console.log("── 解析到的规划步骤数组(供②分批执行)──"); console.log(JSON.stringify(steps, null, 2)); } console.log("\n(PLAN_ONLY=1,不进入②执行任务)\n"); rl.prompt(); return; } if (!BUILD_CONFIRM) { console.log("\n━━ ② 正在执行任务(Build·模拟器工具,可分批)━━\n"); await runBuildWithBatches(userTask, planText); console.log("\n✅ 本轮结束(①②均已完成)\n"); rl.prompt(); return; } while (true) { const hint = "\n┌─ ① 分解任务已完成(BUILD_CONFIRM=1 调试)──────────────────\n" + "│ **run** 或 **回车** → 进入 ② 执行任务\n" + "│ **replan** → 同一任务重新生成计划\n" + "│ **skip** → 跳过②,回到任务输入\n" + "└────────────────────────────────────────────────────────────\n" + "请选择: "; const choice = (await promptAnswer(rl, hint)).toLowerCase(); if (choice === "replan") { planText = await generatePlan(userTask); continue; } if (choice === "skip" || choice === "n" || choice === "no") { console.log("已跳过②,未调用工具。\n"); break; } if (choice === "run" || choice === "" || choice === "y" || choice === "execute") { console.log("\n━━ ② 正在执行任务(Build·模拟器工具,可分批)━━"); await runBuildWithBatches(userTask, planText); break; } console.log("请输入 run / replan / skip,或直接回车。\n"); } console.log("\n✅ 本轮结束\n"); } catch (e) { console.error("❌", e.message ?? e); } rl.prompt();});