refactor: worker mode — server offloads all LLM/exec to worker

- Split into `tori server` / `tori worker` subcommands (clap derive)
- Extract lib.rs for shared crate (agent, llm, exec, state, etc.)
- Introduce AgentUpdate channel to decouple agent loop from DB/broadcast
- New sink.rs: AgentUpdate enum + ServiceManager + handle_agent_updates
- New worker_runner.rs: connects to server WS, runs full agent loop
- Expand worker protocol: ServerToWorker (workflow_assign, comment)
  and WorkerToServer (register, result, update)
- Remove LLM from title generation (heuristic) and template selection
  (must be explicit)
- Remove KB tools (kb_search, kb_read) and remote worker tools
  (list_workers, execute_on_worker) from agent loop
- run_agent_loop/run_step_loop now take mpsc::Sender<AgentUpdate>
  instead of direct DB pool + broadcast sender
This commit is contained in:
2026-04-06 12:54:57 +01:00
parent 28a00dd2f3
commit e4ba385112
9 changed files with 1003 additions and 610 deletions

View File

@@ -1,77 +1,33 @@
mod api;
mod agent;
mod db;
mod kb;
mod llm;
mod exec;
pub mod state;
mod template;
mod timer;
mod tools;
mod worker;
mod ws;
mod ws_worker;
use std::sync::Arc;
use axum::Router;
use clap::{Parser, Subcommand};
use sqlx::sqlite::SqlitePool;
use tower_http::cors::CorsLayer;
use tower_http::services::{ServeDir, ServeFile};
pub struct AppState {
pub db: db::Database,
pub config: Config,
pub agent_mgr: Arc<agent::AgentManager>,
pub kb: Option<Arc<kb::KbManager>>,
pub obj_root: String,
pub auth: Option<api::auth::AuthConfig>,
use tori::{agent, api, db, kb, template, timer, worker, worker_runner, ws, ws_worker};
use tori::{AppState, Config};
#[derive(Parser)]
#[command(name = "tori", about = "Tori AI agent orchestration")]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct Config {
pub llm: LlmConfig,
pub server: ServerConfig,
pub database: DatabaseConfig,
#[serde(default)]
pub template_repo: Option<TemplateRepoConfig>,
/// Path to EC private key PEM file for JWT signing
#[serde(default)]
pub jwt_private_key: Option<String>,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct TemplateRepoConfig {
pub gitea_url: String,
pub owner: String,
pub repo: String,
#[serde(default = "default_repo_path")]
pub local_path: String,
}
fn default_repo_path() -> String {
if std::path::Path::new("/app/oseng-templates").is_dir() {
"/app/oseng-templates".to_string()
} else {
"oseng-templates".to_string()
}
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct LlmConfig {
pub base_url: String,
pub api_key: String,
pub model: String,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct ServerConfig {
pub host: String,
pub port: u16,
}
#[derive(Debug, Clone, serde::Deserialize)]
pub struct DatabaseConfig {
pub path: String,
#[derive(Subcommand)]
enum Command {
/// Start the API server
Server,
/// Start a worker that connects to the server
Worker {
/// Server WebSocket URL
#[arg(long, env = "TORI_SERVER", default_value = "ws://127.0.0.1:3000/ws/tori/workers")]
server: String,
/// Worker name
#[arg(long, env = "TORI_WORKER_NAME")]
name: Option<String>,
},
}
#[tokio::main]
@@ -80,6 +36,22 @@ async fn main() -> anyhow::Result<()> {
.with_env_filter("tori=debug,tower_http=debug")
.init();
let cli = Cli::parse();
match cli.command {
Command::Server => run_server().await,
Command::Worker { server, name } => {
let name = name.unwrap_or_else(|| {
hostname::get()
.map(|h| h.to_string_lossy().to_string())
.unwrap_or_else(|_| "worker-1".to_string())
});
worker_runner::run(&server, &name).await
}
}
}
async fn run_server() -> anyhow::Result<()> {
let config_str = std::fs::read_to_string("config.yaml")
.expect("Failed to read config.yaml");
let config: Config = serde_yaml::from_str(&config_str)
@@ -88,7 +60,6 @@ async fn main() -> anyhow::Result<()> {
let database = db::Database::new(&config.database.path).await?;
database.migrate().await?;
// Initialize KB manager
let kb_arc = match kb::KbManager::new(database.pool.clone()) {
Ok(kb) => {
tracing::info!("KB manager initialized");
@@ -100,7 +71,6 @@ async fn main() -> anyhow::Result<()> {
}
};
// Ensure template repo is cloned before serving
if let Some(ref repo_cfg) = config.template_repo {
template::ensure_repo_ready(repo_cfg).await;
}
@@ -117,8 +87,6 @@ async fn main() -> anyhow::Result<()> {
);
timer::start_timer_runner(database.pool.clone(), agent_mgr.clone());
// Resume incomplete workflows after restart
resume_workflows(database.pool.clone(), agent_mgr.clone()).await;
let obj_root = std::env::var("OBJ_ROOT").unwrap_or_else(|_| "/data/obj".to_string());
@@ -129,7 +97,6 @@ async fn main() -> anyhow::Result<()> {
let public_url = std::env::var("PUBLIC_URL")
.unwrap_or_else(|_| "https://tori.euphon.cloud".to_string());
// Try TikTok SSO first, then Google OAuth
if let (Ok(id), Ok(secret)) = (
std::env::var("SSO_CLIENT_ID"),
std::env::var("SSO_CLIENT_SECRET"),
@@ -157,7 +124,7 @@ async fn main() -> anyhow::Result<()> {
public_url,
})
} else {
tracing::warn!("No OAuth configured (set SSO_CLIENT_ID/SSO_CLIENT_SECRET or GOOGLE_CLIENT_ID/GOOGLE_CLIENT_SECRET)");
tracing::warn!("No OAuth configured");
None
}
};
@@ -172,13 +139,10 @@ async fn main() -> anyhow::Result<()> {
});
let app = Router::new()
// Health check (public, for k8s probes)
.route("/tori/api/health", axum::routing::get(|| async {
axum::Json(serde_json::json!({"status": "ok"}))
}))
// Auth routes are public
.nest("/tori/api/auth", api::auth::router(state.clone()))
// Protected API routes
.nest("/tori/api", api::router(state.clone())
.layer(axum::middleware::from_fn_with_state(state.clone(), api::auth::require_auth))
)