Refactor agent runtime: state machine, feedback processing, execution log

- Add state.rs with AgentState/Step/StepStatus/AgentPhase as single source of truth
- Extract prompts to markdown files loaded via include_str!
- Replace plan_steps table with execution_log + agent_state_snapshots
- Implement user feedback processing with docker-build-cache plan diff:
  load snapshot → LLM revise_plan → diff (title, description) → invalidate from first mismatch → resume
- run_agent_loop accepts optional initial_state for mid-execution resume
- Broadcast plan step status (done/running/pending) to frontend on step transitions
- Rewrite frontend types/components to match new API (ExecutionLogEntry, PlanStepInfo with status)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-02 08:54:43 +00:00
parent 7f6dafeab6
commit 46424cfbc4
16 changed files with 910 additions and 992 deletions

205
src/state.rs Normal file
View File

@@ -0,0 +1,205 @@
use serde::{Deserialize, Serialize};
use crate::llm::ChatMessage;
// --- Agent phase state machine ---
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum AgentPhase {
Planning,
Executing { step: i32 },
Completed,
}
// --- Step ---
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum StepStatus {
Pending,
Running,
Done,
Failed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Step {
pub order: i32,
pub title: String,
pub description: String,
pub status: StepStatus,
/// 完成后由 LLM 填入的一句话摘要
#[serde(default, skip_serializing_if = "Option::is_none")]
pub summary: Option<String>,
/// 用户针对此步骤的反馈
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub user_feedbacks: Vec<String>,
#[serde(default)]
pub db_id: String,
}
// --- Core state ---
/// Agent 运行时的完整状态。整个结构体可以 JSON 序列化后直接存 DB。
///
/// 同时也是构建 LLM API call messages 的数据源:
///
/// Planning 阶段:
/// [ system(planning_prompt), user(requirement), ...current_step_chat_history ]
///
/// Executing 阶段:
/// [ system(execution_prompt), user(step_context), ...current_step_chat_history ]
///
/// step_context = requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentState {
/// 当前阶段
pub phase: AgentPhase,
/// LLM 生成的执行计划
pub steps: Vec<Step>,
/// 当前步骤内的多轮对话历史assistant + tool result
/// 直接 extend 到 messages 尾部。在 step 切换时 clear。
pub current_step_chat_history: Vec<ChatMessage>,
/// LLM 的跨步骤工作区,由 agent 自己读写step 切换时保留
pub scratchpad: String,
}
impl AgentState {
pub fn new() -> Self {
Self {
phase: AgentPhase::Planning,
steps: Vec::new(),
current_step_chat_history: Vec::new(),
scratchpad: String::new(),
}
}
/// 当前正在执行的步骤号Planning/Completed 时返回 0。
pub fn current_step(&self) -> i32 {
match &self.phase {
AgentPhase::Executing { step } => *step,
_ => 0,
}
}
/// Docker-build-cache 风格的 plan diff。
/// 比较 (title, description)user_feedbacks 不参与比较。
/// 第一个 mismatch 开始,该步骤及后续全部 invalidate → Pending。
pub fn apply_plan_diff(&mut self, new_steps: Vec<Step>) {
let old = &self.steps;
let mut result = Vec::new();
let mut invalidated = false;
for (i, new) in new_steps.into_iter().enumerate() {
if !invalidated {
if let Some(old_step) = old.get(i) {
if old_step.title == new.title && old_step.description == new.description {
// Cache hit: keep old status/summary, take new user_feedbacks
result.push(Step {
user_feedbacks: new.user_feedbacks,
..old_step.clone()
});
continue;
}
}
// Cache miss or new step — invalidate from here
invalidated = true;
}
result.push(Step {
status: StepStatus::Pending,
summary: None,
..new
});
}
self.steps = result;
}
/// 找到第一个需要执行的步骤 (Pending 或 Running)。
/// 全部 Done 时返回 None。
pub fn first_actionable_step(&self) -> Option<i32> {
self.steps.iter()
.find(|s| matches!(s.status, StepStatus::Pending | StepStatus::Running))
.map(|s| s.order)
}
/// 构建 Executing 阶段的 user message
/// requirement + plan 概览 + 当前步骤详情 + 已完成摘要 + scratchpad
pub fn build_step_context(&self, requirement: &str) -> String {
let mut ctx = String::new();
// 需求
ctx.push_str("## 需求\n");
ctx.push_str(requirement);
ctx.push_str("\n\n");
// 计划概览
ctx.push_str("## 计划概览\n");
let cur = self.current_step();
for s in &self.steps {
let marker = match s.status {
StepStatus::Done => " done",
StepStatus::Running => " >> current",
StepStatus::Failed => " FAILED",
StepStatus::Pending => "",
};
ctx.push_str(&format!("{}. {}{}\n", s.order, s.title, marker));
}
ctx.push('\n');
// 当前步骤详情
if let Some(s) = self.steps.iter().find(|s| s.order == cur) {
ctx.push_str(&format!("## 当前步骤(步骤 {}\n", cur));
ctx.push_str(&format!("标题:{}\n", s.title));
ctx.push_str(&format!("描述:{}\n", s.description));
if !s.user_feedbacks.is_empty() {
ctx.push_str("\n用户反馈:\n");
for fb in &s.user_feedbacks {
ctx.push_str(&format!("- {}\n", fb));
}
}
ctx.push('\n');
}
// 已完成步骤摘要
let done: Vec<_> = self.steps.iter()
.filter(|s| matches!(s.status, StepStatus::Done))
.collect();
if !done.is_empty() {
ctx.push_str("## 已完成步骤摘要\n");
for s in done {
let summary = s.summary.as_deref().unwrap_or("(no summary)");
ctx.push_str(&format!("- 步骤 {}: {}\n", s.order, summary));
}
ctx.push('\n');
}
// 备忘录
if !self.scratchpad.is_empty() {
ctx.push_str("## 备忘录\n");
ctx.push_str(&self.scratchpad);
ctx.push('\n');
}
ctx
}
/// 构建传给 LLM 的完整 messages 数组。
pub fn build_messages(&self, system_prompt: &str, requirement: &str) -> Vec<ChatMessage> {
let mut msgs = vec![ChatMessage::system(system_prompt)];
match &self.phase {
AgentPhase::Planning => {
msgs.push(ChatMessage::user(requirement));
}
AgentPhase::Executing { .. } => {
msgs.push(ChatMessage::user(&self.build_step_context(requirement)));
}
AgentPhase::Completed => {}
}
msgs.extend(self.current_step_chat_history.clone());
msgs
}
}