Refactor agent runtime: state machine, feedback processing, execution log
- Add state.rs with AgentState/Step/StepStatus/AgentPhase as single source of truth - Extract prompts to markdown files loaded via include_str! - Replace plan_steps table with execution_log + agent_state_snapshots - Implement user feedback processing with docker-build-cache plan diff: load snapshot → LLM revise_plan → diff (title, description) → invalidate from first mismatch → resume - run_agent_loop accepts optional initial_state for mid-execution resume - Broadcast plan step status (done/running/pending) to frontend on step transitions - Rewrite frontend types/components to match new API (ExecutionLogEntry, PlanStepInfo with status) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
842
src/agent.rs
842
src/agent.rs
File diff suppressed because it is too large
Load Diff
@@ -9,7 +9,7 @@ use axum::{
|
||||
use serde::Deserialize;
|
||||
use crate::AppState;
|
||||
use crate::agent::AgentEvent;
|
||||
use crate::db::{Workflow, PlanStep, Comment};
|
||||
use crate::db::{Workflow, ExecutionLogEntry, Comment};
|
||||
use super::{ApiResult, db_err};
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
@@ -77,9 +77,9 @@ async fn create_workflow(
|
||||
async fn list_steps(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Path(workflow_id): Path<String>,
|
||||
) -> ApiResult<Vec<PlanStep>> {
|
||||
sqlx::query_as::<_, PlanStep>(
|
||||
"SELECT * FROM plan_steps WHERE workflow_id = ? ORDER BY step_order"
|
||||
) -> ApiResult<Vec<ExecutionLogEntry>> {
|
||||
sqlx::query_as::<_, ExecutionLogEntry>(
|
||||
"SELECT * FROM execution_log WHERE workflow_id = ? ORDER BY created_at"
|
||||
)
|
||||
.bind(&workflow_id)
|
||||
.fetch_all(&state.db.pool)
|
||||
|
||||
73
src/db.rs
73
src/db.rs
@@ -41,20 +41,6 @@ impl Database {
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
|
||||
sqlx::query(
|
||||
"CREATE TABLE IF NOT EXISTS plan_steps (
|
||||
id TEXT PRIMARY KEY,
|
||||
workflow_id TEXT NOT NULL REFERENCES workflows(id),
|
||||
step_order INTEGER NOT NULL,
|
||||
description TEXT NOT NULL,
|
||||
command TEXT NOT NULL DEFAULT '',
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
output TEXT NOT NULL DEFAULT ''
|
||||
)"
|
||||
)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
|
||||
sqlx::query(
|
||||
"CREATE TABLE IF NOT EXISTS comments (
|
||||
id TEXT PRIMARY KEY,
|
||||
@@ -73,27 +59,6 @@ impl Database {
|
||||
.execute(&self.pool)
|
||||
.await;
|
||||
|
||||
// Migration: add created_at to plan_steps
|
||||
let _ = sqlx::query(
|
||||
"ALTER TABLE plan_steps ADD COLUMN created_at TEXT NOT NULL DEFAULT ''"
|
||||
)
|
||||
.execute(&self.pool)
|
||||
.await;
|
||||
|
||||
// Migration: add kind to plan_steps ('plan' or 'log')
|
||||
let _ = sqlx::query(
|
||||
"ALTER TABLE plan_steps ADD COLUMN kind TEXT NOT NULL DEFAULT 'log'"
|
||||
)
|
||||
.execute(&self.pool)
|
||||
.await;
|
||||
|
||||
// Migration: add plan_step_id to plan_steps (log entries reference their parent plan step)
|
||||
let _ = sqlx::query(
|
||||
"ALTER TABLE plan_steps ADD COLUMN plan_step_id TEXT NOT NULL DEFAULT ''"
|
||||
)
|
||||
.execute(&self.pool)
|
||||
.await;
|
||||
|
||||
// Migration: add deleted column to projects
|
||||
let _ = sqlx::query(
|
||||
"ALTER TABLE projects ADD COLUMN deleted INTEGER NOT NULL DEFAULT 0"
|
||||
@@ -165,6 +130,34 @@ impl Database {
|
||||
.await;
|
||||
}
|
||||
|
||||
// New tables: agent_state_snapshots + execution_log
|
||||
sqlx::query(
|
||||
"CREATE TABLE IF NOT EXISTS agent_state_snapshots (
|
||||
id TEXT PRIMARY KEY,
|
||||
workflow_id TEXT NOT NULL REFERENCES workflows(id),
|
||||
step_order INTEGER NOT NULL,
|
||||
state_json TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)"
|
||||
)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
|
||||
sqlx::query(
|
||||
"CREATE TABLE IF NOT EXISTS execution_log (
|
||||
id TEXT PRIMARY KEY,
|
||||
workflow_id TEXT NOT NULL REFERENCES workflows(id),
|
||||
step_order INTEGER NOT NULL,
|
||||
tool_name TEXT NOT NULL,
|
||||
tool_input TEXT NOT NULL DEFAULT '',
|
||||
output TEXT NOT NULL DEFAULT '',
|
||||
status TEXT NOT NULL DEFAULT 'running',
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)"
|
||||
)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
|
||||
sqlx::query(
|
||||
"CREATE TABLE IF NOT EXISTS timers (
|
||||
id TEXT PRIMARY KEY,
|
||||
@@ -206,17 +199,15 @@ pub struct Workflow {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
||||
pub struct PlanStep {
|
||||
pub struct ExecutionLogEntry {
|
||||
pub id: String,
|
||||
pub workflow_id: String,
|
||||
pub step_order: i32,
|
||||
pub description: String,
|
||||
pub command: String,
|
||||
pub status: String,
|
||||
pub tool_name: String,
|
||||
pub tool_input: String,
|
||||
pub output: String,
|
||||
pub status: String,
|
||||
pub created_at: String,
|
||||
pub kind: String,
|
||||
pub plan_step_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
||||
|
||||
@@ -4,6 +4,7 @@ mod db;
|
||||
mod kb;
|
||||
mod llm;
|
||||
mod exec;
|
||||
mod state;
|
||||
mod timer;
|
||||
mod ws;
|
||||
|
||||
|
||||
25
src/prompts/execution.md
Normal file
25
src/prompts/execution.md
Normal file
@@ -0,0 +1,25 @@
|
||||
你是一个 AI 智能体,正处于【执行阶段】。请专注完成当前步骤的任务。
|
||||
|
||||
可用工具:
|
||||
- execute:执行 shell 命令
|
||||
- read_file / write_file / list_files:文件操作
|
||||
- start_service / stop_service:管理后台服务
|
||||
- update_requirement:更新项目需求
|
||||
- advance_step:完成当前步骤并进入下一步(必须提供摘要)
|
||||
- update_scratchpad:保存跨步骤持久化的关键信息
|
||||
|
||||
工作流程:
|
||||
1. 阅读下方的「当前步骤」描述
|
||||
2. 使用工具执行所需操作
|
||||
3. 完成后调用 advance_step(summary=...) 推进到下一步
|
||||
4. 最后一步完成后,直接回复简要总结(不调用工具)即可结束
|
||||
|
||||
环境信息:
|
||||
- 工作目录是独立的项目工作区,Python venv 已预先激活(.venv/)
|
||||
- 使用 `uv add <包名>` 或 `pip install <包名>` 安装依赖
|
||||
- 静态文件访问:/api/projects/{project_id}/files/{filename}
|
||||
- 后台服务访问:/api/projects/{project_id}/app/(启动命令需监听 0.0.0.0:$PORT)
|
||||
- 【重要】应用通过反向代理访问,前端 HTML/JS 中的 fetch/XHR 请求必须使用相对路径(如 fetch('todos')),绝对不能用 / 开头的路径(如 fetch('/todos')),否则会 404
|
||||
- 知识库工具:kb_search(query) 搜索相关片段,kb_read() 读取全文
|
||||
|
||||
请使用中文回复。
|
||||
32
src/prompts/feedback.md
Normal file
32
src/prompts/feedback.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# 用户反馈处理
|
||||
|
||||
你是项目 `{project_id}` 的 AI 执行引擎。用户对当前执行计划提交了反馈。
|
||||
|
||||
## 你的任务
|
||||
|
||||
1. 分析用户反馈的意图
|
||||
2. 决定是否需要修改计划
|
||||
|
||||
## 当前计划
|
||||
|
||||
{plan_state}
|
||||
|
||||
## 用户反馈
|
||||
|
||||
{feedback}
|
||||
|
||||
## 工具
|
||||
|
||||
- **revise_plan**:修改执行计划。提供完整的步骤列表(包括不需要修改的步骤)。
|
||||
- 已完成且不需要重做的步骤:保持 title 和 description 不变
|
||||
- 需要重做的步骤:修改 description 以反映新需求
|
||||
- 系统自动处理缓存:description 未变的已完成步骤保留成果,**第一个 description 变化的步骤及其后续所有步骤**会重新执行
|
||||
- 你也可以增删步骤
|
||||
|
||||
- 如果反馈只是补充信息、不需要改计划,直接用文字回复即可(不调用工具)
|
||||
|
||||
## 规则
|
||||
|
||||
- 不要为了强制重跑而无意义地改 description。只在执行内容真正需要调整时才改
|
||||
- 可以在 description 中融入反馈信息,让执行步骤能看到用户的补充说明
|
||||
- 如果用户的反馈改变了整体方向,大胆重新规划
|
||||
28
src/prompts/planning.md
Normal file
28
src/prompts/planning.md
Normal file
@@ -0,0 +1,28 @@
|
||||
你是一个 AI 智能体,正处于【规划阶段】。你拥有一个独立的工作区目录。
|
||||
|
||||
你的任务:
|
||||
1. 仔细分析用户的需求
|
||||
2. 使用 list_files 和 read_file 检查工作区的现有状态
|
||||
3. 制定一个高层执行计划,调用 update_plan 提交
|
||||
|
||||
计划要求:
|
||||
- 每个步骤应是一个逻辑阶段(如"搭建环境"、"实现后端 API"),而非具体命令
|
||||
- 每个步骤包含简短标题和详细描述
|
||||
- 步骤数量合理(通常 3-8 步)
|
||||
|
||||
调用 update_plan 后,系统会自动进入执行阶段。
|
||||
|
||||
环境信息:
|
||||
- 工作目录是独立的项目工作区,Python venv 已预先激活(.venv/)
|
||||
- 可用工具:bash、git、curl、uv
|
||||
- 静态文件访问:/api/projects/{project_id}/files/{filename}
|
||||
- 后台服务访问:/api/projects/{project_id}/app/(反向代理,路径会被转发到应用的 /)
|
||||
|
||||
【重要】反向代理注意事项:
|
||||
- 用户通过 /api/projects/{project_id}/app/ 访问应用,请求被代理到应用的 / 路径
|
||||
- 因此前端 HTML 中的所有 API 请求必须使用【不带开头 / 的相对路径】
|
||||
- 正确示例:fetch('todos') 或 fetch('./todos') 错误示例:fetch('/todos') 或 fetch('/api/todos')
|
||||
- HTML 中的 <base> 标签不需要设置,只要不用绝对路径就行
|
||||
- 知识库工具:kb_search(query) 搜索相关片段,kb_read() 读取全文
|
||||
|
||||
请使用中文回复。
|
||||
14
src/prompts/report.md
Normal file
14
src/prompts/report.md
Normal file
@@ -0,0 +1,14 @@
|
||||
你是一个技术报告撰写者。请生成一份简洁的 Markdown 报告,总结工作流的执行结果。
|
||||
|
||||
报告应包含:
|
||||
1. 标题和简要总结
|
||||
2. 关键结果和产出(从步骤输出中提取重要信息)
|
||||
3. 如果启动了 Web 应用/服务(start_service),在报告顶部醒目标出应用访问地址:`/api/projects/{project_id}/app/`
|
||||
4. 生成的文件(如果有),引用地址为:`/api/projects/{project_id}/files/{filename}`
|
||||
5. 遇到的问题(如果有步骤失败)
|
||||
|
||||
格式要求:
|
||||
- 简洁明了,重点是结果而非过程
|
||||
- 使用 Markdown 格式(标题、代码块、表格、列表)
|
||||
- 需要可视化时,使用 ```mermaid 代码块绘制 Mermaid 图表
|
||||
- 使用中文撰写
|
||||
205
src/state.rs
Normal file
205
src/state.rs
Normal file
@@ -0,0 +1,205 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::llm::ChatMessage;
|
||||
|
||||
// --- Agent phase state machine ---
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum AgentPhase {
|
||||
Planning,
|
||||
Executing { step: i32 },
|
||||
Completed,
|
||||
}
|
||||
|
||||
// --- Step ---
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum StepStatus {
|
||||
Pending,
|
||||
Running,
|
||||
Done,
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Step {
|
||||
pub order: i32,
|
||||
pub title: String,
|
||||
pub description: String,
|
||||
pub status: StepStatus,
|
||||
/// 完成后由 LLM 填入的一句话摘要
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub summary: Option<String>,
|
||||
/// 用户针对此步骤的反馈
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub user_feedbacks: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub db_id: String,
|
||||
}
|
||||
|
||||
// --- Core state ---
|
||||
|
||||
/// Agent 运行时的完整状态。整个结构体可以 JSON 序列化后直接存 DB。
|
||||
///
|
||||
/// 同时也是构建 LLM API call messages 的数据源:
|
||||
///
|
||||
/// Planning 阶段:
|
||||
/// [ system(planning_prompt), user(requirement), ...current_step_chat_history ]
|
||||
///
|
||||
/// Executing 阶段:
|
||||
/// [ system(execution_prompt), user(step_context), ...current_step_chat_history ]
|
||||
///
|
||||
/// step_context = requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AgentState {
|
||||
/// 当前阶段
|
||||
pub phase: AgentPhase,
|
||||
/// LLM 生成的执行计划
|
||||
pub steps: Vec<Step>,
|
||||
/// 当前步骤内的多轮对话历史(assistant + tool result),
|
||||
/// 直接 extend 到 messages 尾部。在 step 切换时 clear。
|
||||
pub current_step_chat_history: Vec<ChatMessage>,
|
||||
/// LLM 的跨步骤工作区,由 agent 自己读写,step 切换时保留
|
||||
pub scratchpad: String,
|
||||
}
|
||||
|
||||
impl AgentState {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
phase: AgentPhase::Planning,
|
||||
steps: Vec::new(),
|
||||
current_step_chat_history: Vec::new(),
|
||||
scratchpad: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// 当前正在执行的步骤号,Planning/Completed 时返回 0。
|
||||
pub fn current_step(&self) -> i32 {
|
||||
match &self.phase {
|
||||
AgentPhase::Executing { step } => *step,
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Docker-build-cache 风格的 plan diff。
|
||||
/// 比较 (title, description),user_feedbacks 不参与比较。
|
||||
/// 第一个 mismatch 开始,该步骤及后续全部 invalidate → Pending。
|
||||
pub fn apply_plan_diff(&mut self, new_steps: Vec<Step>) {
|
||||
let old = &self.steps;
|
||||
let mut result = Vec::new();
|
||||
let mut invalidated = false;
|
||||
|
||||
for (i, new) in new_steps.into_iter().enumerate() {
|
||||
if !invalidated {
|
||||
if let Some(old_step) = old.get(i) {
|
||||
if old_step.title == new.title && old_step.description == new.description {
|
||||
// Cache hit: keep old status/summary, take new user_feedbacks
|
||||
result.push(Step {
|
||||
user_feedbacks: new.user_feedbacks,
|
||||
..old_step.clone()
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Cache miss or new step — invalidate from here
|
||||
invalidated = true;
|
||||
}
|
||||
result.push(Step {
|
||||
status: StepStatus::Pending,
|
||||
summary: None,
|
||||
..new
|
||||
});
|
||||
}
|
||||
|
||||
self.steps = result;
|
||||
}
|
||||
|
||||
/// 找到第一个需要执行的步骤 (Pending 或 Running)。
|
||||
/// 全部 Done 时返回 None。
|
||||
pub fn first_actionable_step(&self) -> Option<i32> {
|
||||
self.steps.iter()
|
||||
.find(|s| matches!(s.status, StepStatus::Pending | StepStatus::Running))
|
||||
.map(|s| s.order)
|
||||
}
|
||||
|
||||
/// 构建 Executing 阶段的 user message:
|
||||
/// requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
|
||||
pub fn build_step_context(&self, requirement: &str) -> String {
|
||||
let mut ctx = String::new();
|
||||
|
||||
// 需求
|
||||
ctx.push_str("## 需求\n");
|
||||
ctx.push_str(requirement);
|
||||
ctx.push_str("\n\n");
|
||||
|
||||
// 计划概览
|
||||
ctx.push_str("## 计划概览\n");
|
||||
let cur = self.current_step();
|
||||
for s in &self.steps {
|
||||
let marker = match s.status {
|
||||
StepStatus::Done => " done",
|
||||
StepStatus::Running => " >> current",
|
||||
StepStatus::Failed => " FAILED",
|
||||
StepStatus::Pending => "",
|
||||
};
|
||||
ctx.push_str(&format!("{}. {}{}\n", s.order, s.title, marker));
|
||||
}
|
||||
ctx.push('\n');
|
||||
|
||||
// 当前步骤详情
|
||||
if let Some(s) = self.steps.iter().find(|s| s.order == cur) {
|
||||
ctx.push_str(&format!("## 当前步骤(步骤 {})\n", cur));
|
||||
ctx.push_str(&format!("标题:{}\n", s.title));
|
||||
ctx.push_str(&format!("描述:{}\n", s.description));
|
||||
if !s.user_feedbacks.is_empty() {
|
||||
ctx.push_str("\n用户反馈:\n");
|
||||
for fb in &s.user_feedbacks {
|
||||
ctx.push_str(&format!("- {}\n", fb));
|
||||
}
|
||||
}
|
||||
ctx.push('\n');
|
||||
}
|
||||
|
||||
// 已完成步骤摘要
|
||||
let done: Vec<_> = self.steps.iter()
|
||||
.filter(|s| matches!(s.status, StepStatus::Done))
|
||||
.collect();
|
||||
if !done.is_empty() {
|
||||
ctx.push_str("## 已完成步骤摘要\n");
|
||||
for s in done {
|
||||
let summary = s.summary.as_deref().unwrap_or("(no summary)");
|
||||
ctx.push_str(&format!("- 步骤 {}: {}\n", s.order, summary));
|
||||
}
|
||||
ctx.push('\n');
|
||||
}
|
||||
|
||||
// 备忘录
|
||||
if !self.scratchpad.is_empty() {
|
||||
ctx.push_str("## 备忘录\n");
|
||||
ctx.push_str(&self.scratchpad);
|
||||
ctx.push('\n');
|
||||
}
|
||||
|
||||
ctx
|
||||
}
|
||||
|
||||
/// 构建传给 LLM 的完整 messages 数组。
|
||||
pub fn build_messages(&self, system_prompt: &str, requirement: &str) -> Vec<ChatMessage> {
|
||||
let mut msgs = vec![ChatMessage::system(system_prompt)];
|
||||
|
||||
match &self.phase {
|
||||
AgentPhase::Planning => {
|
||||
msgs.push(ChatMessage::user(requirement));
|
||||
}
|
||||
AgentPhase::Executing { .. } => {
|
||||
msgs.push(ChatMessage::user(&self.build_step_context(requirement)));
|
||||
}
|
||||
AgentPhase::Completed => {}
|
||||
}
|
||||
|
||||
msgs.extend(self.current_step_chat_history.clone());
|
||||
msgs
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user