Refactor agent runtime: state machine, feedback processing, execution log

- Add state.rs with AgentState/Step/StepStatus/AgentPhase as single source of truth - Extract prompts to markdown files loaded via include_str! - Replace plan_steps table with execution_log + agent_state_snapshots - Implement user feedback processing with docker-build-cache plan diff: load snapshot → LLM revise_plan → diff (title, description) → invalidate from first mismatch → resume - run_agent_loop accepts optional initial_state for mid-execution resume - Broadcast plan step status (done/running/pending) to frontend on step transitions - Rewrite frontend types/components to match new API (ExecutionLogEntry, PlanStepInfo with status) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 08:54:43 +00:00
parent 7f6dafeab6
commit 46424cfbc4
16 changed files with 910 additions and 992 deletions
--- a/src/agent.rs
+++ b/src/agent.rs
--- a/src/api/workflows.rs
+++ b/src/api/workflows.rs
@@ -9,7 +9,7 @@ use axum::{
 use serde::Deserialize;
 use crate::AppState;
 use crate::agent::AgentEvent;
-use crate::db::{Workflow, PlanStep, Comment};
+use crate::db::{Workflow, ExecutionLogEntry, Comment};
 use super::{ApiResult, db_err};

 #[derive(serde::Serialize)]
@@ -77,9 +77,9 @@ async fn create_workflow(
 async fn list_steps(
    State(state): State<Arc<AppState>>,
    Path(workflow_id): Path<String>,
-) -> ApiResult<Vec<PlanStep>> {
-    sqlx::query_as::<_, PlanStep>(
-        "SELECT * FROM plan_steps WHERE workflow_id = ? ORDER BY step_order"
+) -> ApiResult<Vec<ExecutionLogEntry>> {
+    sqlx::query_as::<_, ExecutionLogEntry>(
+        "SELECT * FROM execution_log WHERE workflow_id = ? ORDER BY created_at"
    )
    .bind(&workflow_id)
    .fetch_all(&state.db.pool)
--- a/src/db.rs
+++ b/src/db.rs
@@ -41,20 +41,6 @@ impl Database {
        .execute(&self.pool)
        .await?;

-        sqlx::query(
-            "CREATE TABLE IF NOT EXISTS plan_steps (
-                id TEXT PRIMARY KEY,
-                workflow_id TEXT NOT NULL REFERENCES workflows(id),
-                step_order INTEGER NOT NULL,
-                description TEXT NOT NULL,
-                command TEXT NOT NULL DEFAULT '',
-                status TEXT NOT NULL DEFAULT 'pending',
-                output TEXT NOT NULL DEFAULT ''
-            )"
-        )
-        .execute(&self.pool)
-        .await?;
-
        sqlx::query(
            "CREATE TABLE IF NOT EXISTS comments (
                id TEXT PRIMARY KEY,
@@ -73,27 +59,6 @@ impl Database {
        .execute(&self.pool)
        .await;

-        // Migration: add created_at to plan_steps
-        let _ = sqlx::query(
-            "ALTER TABLE plan_steps ADD COLUMN created_at TEXT NOT NULL DEFAULT ''"
-        )
-        .execute(&self.pool)
-        .await;
-
-        // Migration: add kind to plan_steps ('plan' or 'log')
-        let _ = sqlx::query(
-            "ALTER TABLE plan_steps ADD COLUMN kind TEXT NOT NULL DEFAULT 'log'"
-        )
-        .execute(&self.pool)
-        .await;
-
-        // Migration: add plan_step_id to plan_steps (log entries reference their parent plan step)
-        let _ = sqlx::query(
-            "ALTER TABLE plan_steps ADD COLUMN plan_step_id TEXT NOT NULL DEFAULT ''"
-        )
-        .execute(&self.pool)
-        .await;
-
        // Migration: add deleted column to projects
        let _ = sqlx::query(
            "ALTER TABLE projects ADD COLUMN deleted INTEGER NOT NULL DEFAULT 0"
@@ -165,6 +130,34 @@ impl Database {
                .await;
        }

+        // New tables: agent_state_snapshots + execution_log
+        sqlx::query(
+            "CREATE TABLE IF NOT EXISTS agent_state_snapshots (
+                id TEXT PRIMARY KEY,
+                workflow_id TEXT NOT NULL REFERENCES workflows(id),
+                step_order INTEGER NOT NULL,
+                state_json TEXT NOT NULL,
+                created_at TEXT NOT NULL DEFAULT (datetime('now'))
+            )"
+        )
+        .execute(&self.pool)
+        .await?;
+
+        sqlx::query(
+            "CREATE TABLE IF NOT EXISTS execution_log (
+                id TEXT PRIMARY KEY,
+                workflow_id TEXT NOT NULL REFERENCES workflows(id),
+                step_order INTEGER NOT NULL,
+                tool_name TEXT NOT NULL,
+                tool_input TEXT NOT NULL DEFAULT '',
+                output TEXT NOT NULL DEFAULT '',
+                status TEXT NOT NULL DEFAULT 'running',
+                created_at TEXT NOT NULL DEFAULT (datetime('now'))
+            )"
+        )
+        .execute(&self.pool)
+        .await?;
+
        sqlx::query(
            "CREATE TABLE IF NOT EXISTS timers (
                id TEXT PRIMARY KEY,
@@ -206,17 +199,15 @@ pub struct Workflow {
 }

 #[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
-pub struct PlanStep {
+pub struct ExecutionLogEntry {
    pub id: String,
    pub workflow_id: String,
    pub step_order: i32,
-    pub description: String,
-    pub command: String,
-    pub status: String,
+    pub tool_name: String,
+    pub tool_input: String,
    pub output: String,
+    pub status: String,
    pub created_at: String,
-    pub kind: String,
-    pub plan_step_id: String,
 }

 #[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,6 +4,7 @@ mod db;
 mod kb;
 mod llm;
 mod exec;
+mod state;
 mod timer;
 mod ws;

--- a/src/prompts/execution.md
+++ b/src/prompts/execution.md
@@ -0,0 +1,25 @@
+你是一个 AI 智能体，正处于【执行阶段】。请专注完成当前步骤的任务。
+
+可用工具：
+- execute：执行 shell 命令
+- read_file / write_file / list_files：文件操作
+- start_service / stop_service：管理后台服务
+- update_requirement：更新项目需求
+- advance_step：完成当前步骤并进入下一步（必须提供摘要）
+- update_scratchpad：保存跨步骤持久化的关键信息
+
+工作流程：
+1. 阅读下方的「当前步骤」描述
+2. 使用工具执行所需操作
+3. 完成后调用 advance_step(summary=...) 推进到下一步
+4. 最后一步完成后，直接回复简要总结（不调用工具）即可结束
+
+环境信息：
+- 工作目录是独立的项目工作区，Python venv 已预先激活（.venv/）
+- 使用 `uv add <包名>` 或 `pip install <包名>` 安装依赖
+- 静态文件访问：/api/projects/{project_id}/files/{filename}
+- 后台服务访问：/api/projects/{project_id}/app/（启动命令需监听 0.0.0.0:$PORT）
+- 【重要】应用通过反向代理访问，前端 HTML/JS 中的 fetch/XHR 请求必须使用相对路径（如 fetch('todos')），绝对不能用 / 开头的路径（如 fetch('/todos')），否则会 404
+- 知识库工具：kb_search(query) 搜索相关片段，kb_read() 读取全文
+
+请使用中文回复。
--- a/src/prompts/feedback.md
+++ b/src/prompts/feedback.md
@@ -0,0 +1,32 @@
+# 用户反馈处理
+
+你是项目 `{project_id}` 的 AI 执行引擎。用户对当前执行计划提交了反馈。
+
+## 你的任务
+
+1. 分析用户反馈的意图
+2. 决定是否需要修改计划
+
+## 当前计划
+
+{plan_state}
+
+## 用户反馈
+
+{feedback}
+
+## 工具
+
+- **revise_plan**：修改执行计划。提供完整的步骤列表（包括不需要修改的步骤）。
+  - 已完成且不需要重做的步骤：保持 title 和 description 不变
+  - 需要重做的步骤：修改 description 以反映新需求
+  - 系统自动处理缓存：description 未变的已完成步骤保留成果，**第一个 description 变化的步骤及其后续所有步骤**会重新执行
+  - 你也可以增删步骤
+
+- 如果反馈只是补充信息、不需要改计划，直接用文字回复即可（不调用工具）
+
+## 规则
+
+- 不要为了强制重跑而无意义地改 description。只在执行内容真正需要调整时才改
+- 可以在 description 中融入反馈信息，让执行步骤能看到用户的补充说明
+- 如果用户的反馈改变了整体方向，大胆重新规划
--- a/src/prompts/planning.md
+++ b/src/prompts/planning.md
@@ -0,0 +1,28 @@
+你是一个 AI 智能体，正处于【规划阶段】。你拥有一个独立的工作区目录。
+
+你的任务：
+1. 仔细分析用户的需求
+2. 使用 list_files 和 read_file 检查工作区的现有状态
+3. 制定一个高层执行计划，调用 update_plan 提交
+
+计划要求：
+- 每个步骤应是一个逻辑阶段（如"搭建环境"、"实现后端 API"），而非具体命令
+- 每个步骤包含简短标题和详细描述
+- 步骤数量合理（通常 3-8 步）
+
+调用 update_plan 后，系统会自动进入执行阶段。
+
+环境信息：
+- 工作目录是独立的项目工作区，Python venv 已预先激活（.venv/）
+- 可用工具：bash、git、curl、uv
+- 静态文件访问：/api/projects/{project_id}/files/{filename}
+- 后台服务访问：/api/projects/{project_id}/app/（反向代理，路径会被转发到应用的 /）
+
+【重要】反向代理注意事项：
+- 用户通过 /api/projects/{project_id}/app/ 访问应用，请求被代理到应用的 / 路径
+- 因此前端 HTML 中的所有 API 请求必须使用【不带开头 / 的相对路径】
+- 正确示例：fetch('todos') 或 fetch('./todos')    错误示例：fetch('/todos') 或 fetch('/api/todos')
+- HTML 中的 <base> 标签不需要设置，只要不用绝对路径就行
+- 知识库工具：kb_search(query) 搜索相关片段，kb_read() 读取全文
+
+请使用中文回复。
--- a/src/prompts/report.md
+++ b/src/prompts/report.md
@@ -0,0 +1,14 @@
+你是一个技术报告撰写者。请生成一份简洁的 Markdown 报告，总结工作流的执行结果。
+
+报告应包含：
+1. 标题和简要总结
+2. 关键结果和产出（从步骤输出中提取重要信息）
+3. 如果启动了 Web 应用/服务（start_service），在报告顶部醒目标出应用访问地址：`/api/projects/{project_id}/app/`
+4. 生成的文件（如果有），引用地址为：`/api/projects/{project_id}/files/{filename}`
+5. 遇到的问题（如果有步骤失败）
+
+格式要求：
+- 简洁明了，重点是结果而非过程
+- 使用 Markdown 格式（标题、代码块、表格、列表）
+- 需要可视化时，使用 ```mermaid 代码块绘制 Mermaid 图表
+- 使用中文撰写
--- a/src/state.rs
+++ b/src/state.rs
@@ -0,0 +1,205 @@
+use serde::{Deserialize, Serialize};
+
+use crate::llm::ChatMessage;
+
+// --- Agent phase state machine ---
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum AgentPhase {
+    Planning,
+    Executing { step: i32 },
+    Completed,
+}
+
+// --- Step ---
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum StepStatus {
+    Pending,
+    Running,
+    Done,
+    Failed,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Step {
+    pub order: i32,
+    pub title: String,
+    pub description: String,
+    pub status: StepStatus,
+    /// 完成后由 LLM 填入的一句话摘要
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub summary: Option<String>,
+    /// 用户针对此步骤的反馈
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub user_feedbacks: Vec<String>,
+    #[serde(default)]
+    pub db_id: String,
+}
+
+// --- Core state ---
+
+/// Agent 运行时的完整状态。整个结构体可以 JSON 序列化后直接存 DB。
+///
+/// 同时也是构建 LLM API call messages 的数据源：
+///
+/// Planning 阶段：
+///   [ system(planning_prompt), user(requirement), ...current_step_chat_history ]
+///
+/// Executing 阶段：
+///   [ system(execution_prompt), user(step_context), ...current_step_chat_history ]
+///
+///   step_context = requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AgentState {
+    /// 当前阶段
+    pub phase: AgentPhase,
+    /// LLM 生成的执行计划
+    pub steps: Vec<Step>,
+    /// 当前步骤内的多轮对话历史（assistant + tool result），
+    /// 直接 extend 到 messages 尾部。在 step 切换时 clear。
+    pub current_step_chat_history: Vec<ChatMessage>,
+    /// LLM 的跨步骤工作区，由 agent 自己读写，step 切换时保留
+    pub scratchpad: String,
+}
+
+impl AgentState {
+    pub fn new() -> Self {
+        Self {
+            phase: AgentPhase::Planning,
+            steps: Vec::new(),
+            current_step_chat_history: Vec::new(),
+            scratchpad: String::new(),
+        }
+    }
+
+    /// 当前正在执行的步骤号，Planning/Completed 时返回 0。
+    pub fn current_step(&self) -> i32 {
+        match &self.phase {
+            AgentPhase::Executing { step } => *step,
+            _ => 0,
+        }
+    }
+
+    /// Docker-build-cache 风格的 plan diff。
+    /// 比较 (title, description)，user_feedbacks 不参与比较。
+    /// 第一个 mismatch 开始，该步骤及后续全部 invalidate → Pending。
+    pub fn apply_plan_diff(&mut self, new_steps: Vec<Step>) {
+        let old = &self.steps;
+        let mut result = Vec::new();
+        let mut invalidated = false;
+
+        for (i, new) in new_steps.into_iter().enumerate() {
+            if !invalidated {
+                if let Some(old_step) = old.get(i) {
+                    if old_step.title == new.title && old_step.description == new.description {
+                        // Cache hit: keep old status/summary, take new user_feedbacks
+                        result.push(Step {
+                            user_feedbacks: new.user_feedbacks,
+                            ..old_step.clone()
+                        });
+                        continue;
+                    }
+                }
+                // Cache miss or new step — invalidate from here
+                invalidated = true;
+            }
+            result.push(Step {
+                status: StepStatus::Pending,
+                summary: None,
+                ..new
+            });
+        }
+
+        self.steps = result;
+    }
+
+    /// 找到第一个需要执行的步骤 (Pending 或 Running)。
+    /// 全部 Done 时返回 None。
+    pub fn first_actionable_step(&self) -> Option<i32> {
+        self.steps.iter()
+            .find(|s| matches!(s.status, StepStatus::Pending | StepStatus::Running))
+            .map(|s| s.order)
+    }
+
+    /// 构建 Executing 阶段的 user message：
+    /// requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
+    pub fn build_step_context(&self, requirement: &str) -> String {
+        let mut ctx = String::new();
+
+        // 需求
+        ctx.push_str("## 需求\n");
+        ctx.push_str(requirement);
+        ctx.push_str("\n\n");
+
+        // 计划概览
+        ctx.push_str("## 计划概览\n");
+        let cur = self.current_step();
+        for s in &self.steps {
+            let marker = match s.status {
+                StepStatus::Done => "  done",
+                StepStatus::Running => "  >> current",
+                StepStatus::Failed => "  FAILED",
+                StepStatus::Pending => "",
+            };
+            ctx.push_str(&format!("{}. {}{}\n", s.order, s.title, marker));
+        }
+        ctx.push('\n');
+
+        // 当前步骤详情
+        if let Some(s) = self.steps.iter().find(|s| s.order == cur) {
+            ctx.push_str(&format!("## 当前步骤（步骤 {}）\n", cur));
+            ctx.push_str(&format!("标题：{}\n", s.title));
+            ctx.push_str(&format!("描述：{}\n", s.description));
+            if !s.user_feedbacks.is_empty() {
+                ctx.push_str("\n用户反馈：\n");
+                for fb in &s.user_feedbacks {
+                    ctx.push_str(&format!("- {}\n", fb));
+                }
+            }
+            ctx.push('\n');
+        }
+
+        // 已完成步骤摘要
+        let done: Vec<_> = self.steps.iter()
+            .filter(|s| matches!(s.status, StepStatus::Done))
+            .collect();
+        if !done.is_empty() {
+            ctx.push_str("## 已完成步骤摘要\n");
+            for s in done {
+                let summary = s.summary.as_deref().unwrap_or("(no summary)");
+                ctx.push_str(&format!("- 步骤 {}: {}\n", s.order, summary));
+            }
+            ctx.push('\n');
+        }
+
+        // 备忘录
+        if !self.scratchpad.is_empty() {
+            ctx.push_str("## 备忘录\n");
+            ctx.push_str(&self.scratchpad);
+            ctx.push('\n');
+        }
+
+        ctx
+    }
+
+    /// 构建传给 LLM 的完整 messages 数组。
+    pub fn build_messages(&self, system_prompt: &str, requirement: &str) -> Vec<ChatMessage> {
+        let mut msgs = vec![ChatMessage::system(system_prompt)];
+
+        match &self.phase {
+            AgentPhase::Planning => {
+                msgs.push(ChatMessage::user(requirement));
+            }
+            AgentPhase::Executing { .. } => {
+                msgs.push(ChatMessage::user(&self.build_step_context(requirement)));
+            }
+            AgentPhase::Completed => {}
+        }
+
+        msgs.extend(self.current_step_chat_history.clone());
+        msgs
+    }
+}