refactor: server no longer runs agent loop or LLM

- Remove agent_loop from server (was ~400 lines) — server dispatches to workers
- AgentManager simplified to pure dispatcher (send_event → worker)
- Remove LLM config requirement from server (workers bring their own via config.yaml)
- Remove process_feedback, build_feedback_tools from server
- Remove chat API endpoint (LLM on workers only)
- Remove service proxy (services run on workers)
- Worker reads LLM config from its own config.yaml
- ws_worker.rs handles WorkerToServer::Update messages (DB + broadcast)
- Verified locally: tori server + tori worker connect and register
This commit is contained in:
2026-04-06 13:18:21 +01:00
parent dfedb6dd45
commit decabc0e8a
9 changed files with 380 additions and 997 deletions

View File

@@ -1,17 +1,13 @@
use std::collections::HashMap;
use std::path::Path;
use std::sync::Arc;
use std::sync::atomic::{AtomicU16, Ordering};
use serde::{Deserialize, Serialize};
use sqlx::sqlite::SqlitePool;
use tokio::sync::{mpsc, RwLock, broadcast};
use crate::llm::{LlmClient, ChatMessage, Tool, ToolFunction};
use crate::exec::LocalExecutor;
use crate::template::{self, LoadedTemplate};
use crate::tools::ExternalToolManager;
use crate::worker::WorkerManager;
use crate::LlmConfig;
use crate::sink::{AgentUpdate, ServiceManager};
use crate::state::{AgentState, AgentPhase, Artifact, Step, StepStatus, StepResult, StepResultStatus, check_scratchpad_size};
@@ -73,49 +69,23 @@ pub fn plan_infos_from_state(state: &AgentState) -> Vec<PlanStepInfo> {
}
pub struct AgentManager {
agents: RwLock<HashMap<String, mpsc::Sender<AgentEvent>>>,
broadcast: RwLock<HashMap<String, broadcast::Sender<WsMessage>>>,
pub services: RwLock<HashMap<String, ServiceInfo>>,
next_port: AtomicU16,
pool: SqlitePool,
llm_config: LlmConfig,
template_repo: Option<crate::TemplateRepoConfig>,
kb: Option<Arc<crate::kb::KbManager>>,
jwt_private_key_path: Option<String>,
pub worker_mgr: Arc<WorkerManager>,
}
impl AgentManager {
pub fn new(
pool: SqlitePool,
llm_config: LlmConfig,
template_repo: Option<crate::TemplateRepoConfig>,
kb: Option<Arc<crate::kb::KbManager>>,
jwt_private_key_path: Option<String>,
worker_mgr: Arc<WorkerManager>,
) -> Arc<Self> {
Arc::new(Self {
agents: RwLock::new(HashMap::new()),
broadcast: RwLock::new(HashMap::new()),
services: RwLock::new(HashMap::new()),
next_port: AtomicU16::new(9100),
pool,
llm_config,
template_repo,
kb,
jwt_private_key_path,
worker_mgr,
})
}
pub fn allocate_port(&self) -> u16 {
self.next_port.fetch_add(1, Ordering::Relaxed)
}
pub async fn get_service_port(&self, project_id: &str) -> Option<u16> {
self.services.read().await.get(project_id).map(|s| s.port)
}
pub async fn get_broadcast(&self, project_id: &str) -> broadcast::Receiver<WsMessage> {
let mut map = self.broadcast.write().await;
let tx = map.entry(project_id.to_string())
@@ -123,403 +93,95 @@ impl AgentManager {
tx.subscribe()
}
pub async fn send_event(self: &Arc<Self>, project_id: &str, event: AgentEvent) {
let agents = self.agents.read().await;
if let Some(tx) = agents.get(project_id) {
let _ = tx.send(event).await;
} else {
drop(agents);
self.spawn_agent(project_id.to_string()).await;
let agents = self.agents.read().await;
if let Some(tx) = agents.get(project_id) {
let _ = tx.send(event).await;
}
}
pub fn get_broadcast_sender(&self, project_id: &str) -> broadcast::Sender<WsMessage> {
// This is called synchronously from ws_worker; we use a blocking approach
// since RwLock is tokio-based, we need a sync wrapper
// Actually, let's use try_write or just create a new one
// For simplicity, return a new sender each time (they share the channel)
// This is safe because broadcast::Sender is Clone
tokio::task::block_in_place(|| {
let rt = tokio::runtime::Handle::current();
rt.block_on(async {
let mut map = self.broadcast.write().await;
map.entry(project_id.to_string())
.or_insert_with(|| broadcast::channel(64).0)
.clone()
})
})
}
async fn spawn_agent(self: &Arc<Self>, project_id: String) {
let (tx, rx) = mpsc::channel(32);
self.agents.write().await.insert(project_id.clone(), tx);
/// Dispatch an event to a worker.
pub async fn send_event(self: &Arc<Self>, project_id: &str, event: AgentEvent) {
match event {
AgentEvent::NewRequirement { workflow_id, requirement, template_id } => {
// Generate title (heuristic)
let title = generate_title_heuristic(&requirement);
let _ = sqlx::query("UPDATE projects SET name = ? WHERE id = ?")
.bind(&title).bind(project_id).execute(&self.pool).await;
let btx = {
let mut map = self.broadcast.write().await;
map.entry(project_id.to_string())
.or_insert_with(|| broadcast::channel(64).0)
.clone()
};
let _ = btx.send(WsMessage::ProjectUpdate {
project_id: project_id.to_string(),
name: title,
});
let broadcast_tx = {
let mut map = self.broadcast.write().await;
map.entry(project_id.clone())
.or_insert_with(|| broadcast::channel(64).0)
.clone()
};
// Update workflow status
let _ = sqlx::query("UPDATE workflows SET status = 'executing' WHERE id = ?")
.bind(&workflow_id).execute(&self.pool).await;
let _ = btx.send(WsMessage::WorkflowStatusUpdate {
workflow_id: workflow_id.clone(),
status: "executing".into(),
});
let mgr = Arc::clone(self);
tokio::spawn(agent_loop(project_id, rx, broadcast_tx, mgr));
// Persist template_id
if let Some(ref tid) = template_id {
let _ = sqlx::query("UPDATE workflows SET template_id = ? WHERE id = ?")
.bind(tid).bind(&workflow_id).execute(&self.pool).await;
}
// Dispatch to worker
let assign = crate::worker::ServerToWorker::WorkflowAssign {
workflow_id: workflow_id.clone(),
project_id: project_id.to_string(),
requirement,
template_id,
initial_state: None,
require_plan_approval: false,
};
match self.worker_mgr.assign_workflow(assign).await {
Ok(name) => {
tracing::info!("Workflow {} dispatched to worker '{}'", workflow_id, name);
}
Err(e) => {
tracing::error!("Failed to dispatch workflow {}: {}", workflow_id, e);
let _ = sqlx::query("UPDATE workflows SET status = 'failed' WHERE id = ?")
.bind(&workflow_id).execute(&self.pool).await;
let _ = btx.send(WsMessage::WorkflowStatusUpdate {
workflow_id,
status: "failed".into(),
});
let _ = btx.send(WsMessage::Error {
message: format!("No worker available: {}", e),
});
}
}
}
AgentEvent::Comment { workflow_id, content } => {
if let Err(e) = self.worker_mgr.forward_comment(&workflow_id, &content).await {
tracing::warn!("Failed to forward comment for workflow {}: {}", workflow_id, e);
}
}
}
}
}
// Template system is in crate::template
/// Read INSTRUCTIONS.md from workdir if it exists.
async fn read_instructions(workdir: &str) -> String {
let path = format!("{}/INSTRUCTIONS.md", workdir);
tokio::fs::read_to_string(&path).await.unwrap_or_default()
}
async fn ensure_workspace(exec: &LocalExecutor, workdir: &str) {
let _ = tokio::fs::create_dir_all(workdir).await;
let setup_script = format!("{}/scripts/setup.sh", workdir);
if Path::new(&setup_script).exists() {
tracing::info!("Running setup.sh in {}", workdir);
let _ = exec.execute("bash scripts/setup.sh", workdir).await;
} else {
let venv_path = format!("{}/.venv", workdir);
if !Path::new(&venv_path).exists() {
let _ = exec.execute("uv venv .venv", workdir).await;
let _ = exec.execute("uv pip install httpx fastapi uvicorn requests flask pydantic numpy pandas matplotlib pillow jinja2 pyyaml python-dotenv beautifulsoup4 lxml aiohttp aiofiles pytest rich click typer sqlalchemy", workdir).await;
}
}
}
async fn agent_loop(
project_id: String,
mut rx: mpsc::Receiver<AgentEvent>,
broadcast_tx: broadcast::Sender<WsMessage>,
mgr: Arc<AgentManager>,
) {
let pool = mgr.pool.clone();
let llm_config = mgr.llm_config.clone();
let llm = LlmClient::new(&llm_config);
let exec = LocalExecutor::new(mgr.jwt_private_key_path.clone());
let workdir = format!("/app/data/workspaces/{}", project_id);
let svc_mgr = ServiceManager::new(9100);
// Create update channel and spawn handler
let (update_tx, update_rx) = mpsc::channel::<AgentUpdate>(64);
{
let handler_pool = pool.clone();
let handler_btx = broadcast_tx.clone();
tokio::spawn(async move {
crate::sink::handle_agent_updates(update_rx, handler_pool, handler_btx).await;
});
}
tracing::info!("Agent loop started for project {}", project_id);
while let Some(event) = rx.recv().await {
match event {
AgentEvent::NewRequirement { workflow_id, requirement, template_id: forced_template } => {
tracing::info!("Processing new requirement for workflow {}", workflow_id);
// Generate project title from requirement (heuristic, no LLM)
{
let title = generate_title_heuristic(&requirement);
let _ = sqlx::query("UPDATE projects SET name = ? WHERE id = ?")
.bind(&title)
.bind(&project_id)
.execute(&pool)
.await;
let _ = broadcast_tx.send(WsMessage::ProjectUpdate {
project_id: project_id.clone(),
name: title,
});
}
let _ = update_tx.send(AgentUpdate::WorkflowStatus {
workflow_id: workflow_id.clone(),
status: "executing".into(),
}).await;
// Template: must be explicitly provided (no LLM selection)
let template_id = forced_template;
// Persist template_id to workflow
if let Some(ref tid) = template_id {
let _ = sqlx::query("UPDATE workflows SET template_id = ? WHERE id = ?")
.bind(tid)
.bind(&workflow_id)
.execute(&pool)
.await;
}
let loaded_template = if let Some(ref tid) = template_id {
tracing::info!("Template selected for workflow {}: {}", workflow_id, tid);
let _ = tokio::fs::create_dir_all(&workdir).await;
if template::is_repo_template(tid) {
// Repo template: extract from git then load
match template::extract_repo_template(tid, mgr.template_repo.as_ref()).await {
Ok(template_dir) => {
if let Err(e) = template::apply_template(&template_dir, &workdir).await {
tracing::error!("Failed to apply repo template {}: {}", tid, e);
}
match LoadedTemplate::load_from_dir(tid, &template_dir).await {
Ok(t) => Some(t),
Err(e) => {
tracing::error!("Failed to load repo template {}: {}", tid, e);
None
}
}
}
Err(e) => {
tracing::error!("Failed to extract repo template {}: {}", tid, e);
None
}
}
} else {
// Local built-in template
let template_dir = std::path::Path::new(template::templates_dir()).join(tid);
if let Err(e) = template::apply_template(&template_dir, &workdir).await {
tracing::error!("Failed to apply template {}: {}", tid, e);
}
match LoadedTemplate::load(tid).await {
Ok(t) => Some(t),
Err(e) => {
tracing::error!("Failed to load template {}: {}", tid, e);
None
}
}
}
} else {
None
};
// Import KB files from template
if let Some(ref t) = loaded_template {
if let Some(ref kb) = mgr.kb {
let mut batch_items: Vec<(String, String)> = Vec::new();
for (title, content) in &t.kb_files {
// Check if article already exists by title
let existing: Option<String> = sqlx::query_scalar(
"SELECT id FROM kb_articles WHERE title = ?"
)
.bind(title)
.fetch_optional(&pool)
.await
.ok()
.flatten();
let article_id = if let Some(id) = existing {
let _ = sqlx::query(
"UPDATE kb_articles SET content = ?, updated_at = datetime('now') WHERE id = ?"
)
.bind(content)
.bind(&id)
.execute(&pool)
.await;
id
} else {
let id = uuid::Uuid::new_v4().to_string();
let _ = sqlx::query(
"INSERT INTO kb_articles (id, title, content) VALUES (?, ?, ?)"
)
.bind(&id)
.bind(title)
.bind(content)
.execute(&pool)
.await;
id
};
batch_items.push((article_id, content.clone()));
}
// Batch index: single embed.py call for all articles
if !batch_items.is_empty() {
if let Err(e) = kb.index_batch(&batch_items).await {
tracing::warn!("Failed to batch index KB articles: {}", e);
}
}
tracing::info!("Imported {} KB articles from template", t.kb_files.len());
}
}
ensure_workspace(&exec, &workdir).await;
let _ = tokio::fs::write(format!("{}/requirement.md", workdir), &requirement).await;
// Run template setup if present
if let Some(ref tid) = template_id {
let template_dir = if template::is_repo_template(tid) {
template::extract_repo_template(tid, mgr.template_repo.as_ref())
.await
.ok()
} else {
Some(std::path::Path::new(template::templates_dir()).join(tid))
};
if let Some(ref tdir) = template_dir {
if let Err(e) = template::run_setup(tdir, &workdir).await {
tracing::error!("Template setup failed for {}: {}", tid, e);
}
}
}
let instructions = if let Some(ref t) = loaded_template {
t.instructions.clone()
} else {
read_instructions(&workdir).await
};
let ext_tools = loaded_template.as_ref().map(|t| &t.external_tools);
let plan_approval = loaded_template.as_ref().map_or(false, |t| t.require_plan_approval);
tracing::info!("Starting agent loop for workflow {}", workflow_id);
// Run tool-calling agent loop
let result = run_agent_loop(
&llm, &exec, &update_tx, &mut rx,
&project_id, &workflow_id, &requirement, &workdir, &svc_mgr,
&instructions, None, ext_tools,
plan_approval,
).await;
let final_status = if result.is_ok() { "done" } else { "failed" };
tracing::info!("Agent loop finished for workflow {}, status: {}", workflow_id, final_status);
if let Err(e) = &result {
tracing::error!("Agent error for workflow {}: {}", workflow_id, e);
let _ = update_tx.send(AgentUpdate::Error {
message: format!("Agent error: {}", e),
}).await;
}
let _ = update_tx.send(AgentUpdate::WorkflowComplete {
workflow_id: workflow_id.clone(),
status: final_status.into(),
report: None, // Report generation will be handled separately
}).await;
}
AgentEvent::Comment { workflow_id, content } => {
tracing::info!("Comment on workflow {}: {}", workflow_id, content);
let wf = sqlx::query_as::<_, crate::db::Workflow>(
"SELECT * FROM workflows WHERE id = ?",
)
.bind(&workflow_id)
.fetch_optional(&pool)
.await
.ok()
.flatten();
let Some(wf) = wf else { continue };
// Load latest state snapshot
let snapshot = sqlx::query_scalar::<_, String>(
"SELECT state_json FROM agent_state_snapshots WHERE workflow_id = ? ORDER BY created_at DESC LIMIT 1"
)
.bind(&workflow_id)
.fetch_optional(&pool)
.await
.ok()
.flatten();
let mut state = snapshot
.and_then(|json| serde_json::from_str::<AgentState>(&json).ok())
.unwrap_or_else(AgentState::new);
// Resume directly if: workflow is failed/done/waiting_user,
// OR if state snapshot has a WaitingUser step (e.g. after pod restart)
let has_waiting_step = state.steps.iter().any(|s| matches!(s.status, StepStatus::WaitingUser));
let is_resuming = wf.status == "failed" || wf.status == "done"
|| wf.status == "waiting_user" || has_waiting_step;
if is_resuming {
// Reset Failed/WaitingUser steps so they get re-executed
for step in &mut state.steps {
if matches!(step.status, StepStatus::Failed) {
step.status = StepStatus::Pending;
}
if matches!(step.status, StepStatus::WaitingUser) {
// Mark as Running so it continues (not re-plans)
step.status = StepStatus::Running;
}
}
// Attach comment as feedback to the first actionable step
if let Some(order) = state.first_actionable_step() {
if let Some(step) = state.steps.iter_mut().find(|s| s.order == order) {
step.user_feedbacks.push(content.clone());
}
}
tracing::info!("[workflow {}] Resuming from state (status={}), first actionable: {:?}",
workflow_id, wf.status, state.first_actionable_step());
} else {
// Active workflow: LLM decides whether to revise plan
state = process_feedback(
&llm, &update_tx,
&project_id, &workflow_id, state, &content,
).await;
}
// If there are actionable steps, resume execution
if state.first_actionable_step().is_some() {
ensure_workspace(&exec, &workdir).await;
let _ = update_tx.send(AgentUpdate::WorkflowStatus {
workflow_id: workflow_id.clone(),
status: "executing".into(),
}).await;
// Prepare state for execution: set first pending step to Running
if let Some(next) = state.first_actionable_step() {
let was_same_step = matches!(state.phase, AgentPhase::Executing { step } if step == next);
if let Some(step) = state.steps.iter_mut().find(|s| s.order == next) {
if matches!(step.status, StepStatus::Pending) {
step.status = StepStatus::Running;
}
}
state.phase = AgentPhase::Executing { step: next };
// Only clear chat history when advancing to a new step;
// keep it when resuming the same step after ask_user
if !was_same_step {
state.current_step_chat_history.clear();
}
}
let instructions = read_instructions(&workdir).await;
// Reload template config if available
let loaded_template = if !wf.template_id.is_empty() {
let tid = &wf.template_id;
if template::is_repo_template(tid) {
match template::extract_repo_template(tid, mgr.template_repo.as_ref()).await {
Ok(template_dir) => {
LoadedTemplate::load_from_dir(tid, &template_dir).await.ok()
}
Err(e) => {
tracing::warn!("Failed to reload template {}: {}", tid, e);
None
}
}
} else {
LoadedTemplate::load(tid).await.ok()
}
} else {
None
};
let ext_tools = loaded_template.as_ref().map(|t| &t.external_tools);
let plan_approval = loaded_template.as_ref().map_or(false, |t| t.require_plan_approval);
let result = run_agent_loop(
&llm, &exec, &update_tx, &mut rx,
&project_id, &workflow_id, &wf.requirement, &workdir, &svc_mgr,
&instructions, Some(state), ext_tools,
plan_approval,
).await;
let final_status = if result.is_ok() { "done" } else { "failed" };
if let Err(e) = &result {
let _ = update_tx.send(AgentUpdate::Error {
message: format!("Agent error: {}", e),
}).await;
}
let _ = update_tx.send(AgentUpdate::WorkflowComplete {
workflow_id: workflow_id.clone(),
status: final_status.into(),
report: None,
}).await;
} else {
// No actionable steps — feedback was informational only
let _ = update_tx.send(AgentUpdate::WorkflowStatus {
workflow_id: workflow_id.clone(),
status: "done".into(),
}).await;
}
}
}
}
tracing::info!("Agent loop ended for project {}", project_id);
}
// --- Tool definitions ---
fn make_tool(name: &str, description: &str, parameters: serde_json::Value) -> Tool {
@@ -750,49 +412,6 @@ fn build_step_user_message(step: &Step, completed_summaries: &[(i32, String, Str
ctx
}
fn build_feedback_prompt(project_id: &str, state: &AgentState, feedback: &str) -> String {
let mut plan_state = String::new();
for s in &state.steps {
let status = match s.status {
StepStatus::Done => " [done]",
StepStatus::Running => " [running]",
StepStatus::WaitingUser => " [waiting]",
StepStatus::Failed => " [FAILED]",
StepStatus::Pending => "",
};
plan_state.push_str(&format!("{}. {}{}\n {}\n", s.order, s.title, status, s.description));
if let Some(summary) = &s.summary {
plan_state.push_str(&format!(" 摘要: {}\n", summary));
}
}
include_str!("prompts/feedback.md")
.replace("{project_id}", project_id)
.replace("{plan_state}", &plan_state)
.replace("{feedback}", feedback)
}
fn build_feedback_tools() -> Vec<Tool> {
vec![
make_tool("revise_plan", "修改执行计划。提供完整步骤列表。系统自动 diffdescription 未变的已完成步骤保留成果,变化的步骤及后续重新执行。", serde_json::json!({
"type": "object",
"properties": {
"steps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": { "type": "string", "description": "步骤标题" },
"description": { "type": "string", "description": "详细描述" }
},
"required": ["title", "description"]
}
}
},
"required": ["steps"]
})),
]
}
// --- Helpers ---
/// Truncate a string at a char boundary, returning at most `max_bytes` bytes.
@@ -956,91 +575,6 @@ async fn send_llm_call(
}).await;
}
/// Process user feedback: call LLM to decide whether to revise the plan.
/// Returns the (possibly modified) AgentState ready for resumed execution.
async fn process_feedback(
llm: &LlmClient,
update_tx: &mpsc::Sender<AgentUpdate>,
project_id: &str,
workflow_id: &str,
mut state: AgentState,
feedback: &str,
) -> AgentState {
let prompt = build_feedback_prompt(project_id, &state, feedback);
let tools = build_feedback_tools();
let messages = vec![
ChatMessage::system(&prompt),
ChatMessage::user(feedback),
];
tracing::info!("[workflow {}] Processing feedback with LLM", workflow_id);
let response = match llm.chat_with_tools(messages, &tools).await {
Ok(r) => r,
Err(e) => {
tracing::error!("[workflow {}] Feedback LLM call failed: {}", workflow_id, e);
if let Some(step) = state.steps.iter_mut().find(|s| !matches!(s.status, StepStatus::Done)) {
step.user_feedbacks.push(feedback.to_string());
}
return state;
}
};
let choice = match response.choices.into_iter().next() {
Some(c) => c,
None => return state,
};
if let Some(tool_calls) = &choice.message.tool_calls {
for tc in tool_calls {
if tc.function.name == "revise_plan" {
let args: serde_json::Value = serde_json::from_str(&tc.function.arguments).unwrap_or_default();
let raw_steps = args["steps"].as_array().cloned().unwrap_or_default();
let new_steps: Vec<Step> = raw_steps.iter().enumerate().map(|(i, item)| {
let order = (i + 1) as i32;
Step {
order,
title: item["title"].as_str().unwrap_or("").to_string(),
description: item["description"].as_str().unwrap_or("").to_string(),
status: StepStatus::Pending,
summary: None,
user_feedbacks: Vec::new(),
db_id: String::new(),
artifacts: Vec::new(),
}
}).collect();
let diff = state.apply_plan_diff(new_steps);
let _ = update_tx.send(AgentUpdate::PlanUpdate {
workflow_id: workflow_id.to_string(),
steps: plan_infos_from_state(&state),
}).await;
tracing::info!("[workflow {}] Plan revised via feedback. First actionable: {:?}",
workflow_id, state.first_actionable_step());
let diff_display = format!("```diff\n{}\n```", diff);
send_execution(update_tx, workflow_id, 0, "revise_plan", "计划变更", &diff_display, "done").await;
}
}
} else {
let text = choice.message.content.as_deref().unwrap_or("");
tracing::info!("[workflow {}] Feedback processed, no plan change: {}", workflow_id, truncate_str(text, 200));
send_execution(update_tx, workflow_id, state.current_step(), "text_response", "", text, "done").await;
}
let target_order = state.first_actionable_step()
.unwrap_or_else(|| state.steps.last().map(|s| s.order).unwrap_or(0));
if let Some(step) = state.steps.iter_mut().find(|s| s.order == target_order) {
step.user_feedbacks.push(feedback.to_string());
}
send_snapshot(update_tx, workflow_id, state.current_step(), &state).await;
state
}
/// Run an isolated sub-loop for a single step. Returns StepResult.
#[allow(clippy::too_many_arguments)]
pub async fn run_step_loop(