Initial commit: repo-vis — 3D codebase visualization

Rust (axum) backend with git clone / zip upload / SQLite cache.
Three.js frontend with D3 treemap layout and semantic zoom.
Docker deployment with musl static binary.
This commit is contained in:
2026-04-06 13:30:12 +01:00
commit 7232d4cc37
16 changed files with 4334 additions and 0 deletions

1751
server/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

19
server/Cargo.toml Normal file
View File

@@ -0,0 +1,19 @@
[package]
name = "repo-vis-server"
version = "0.1.0"
edition = "2021"
[dependencies]
axum = { version = "0.8", features = ["multipart"] }
tokio = { version = "1", features = ["full"] }
tower-http = { version = "0.6", features = ["fs", "cors"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
rusqlite = { version = "0.32", features = ["bundled"] }
walkdir = "2"
sha2 = "0.10"
hex = "0.4"
zip = "2"
tempfile = "3"
tracing = "0.1"
tracing-subscriber = "0.3"

131
server/src/cache.rs Normal file
View File

@@ -0,0 +1,131 @@
use rusqlite::Connection;
use serde::Serialize;
use sha2::{Digest, Sha256};
use std::path::Path;
use std::sync::Mutex;
use std::time::{SystemTime, UNIX_EPOCH};
const TTL_SECS: u64 = 24 * 60 * 60; // 24 hours
#[derive(Debug, Serialize)]
pub struct RepoEntry {
pub name: String,
pub source: String, // "git:url" or "zip:filename"
pub file_count: usize,
pub cache_key: String,
pub created: u64,
}
pub struct Cache {
conn: Mutex<Connection>,
}
impl Cache {
pub fn new(db_path: &Path) -> Self {
if let Some(parent) = db_path.parent() {
std::fs::create_dir_all(parent).ok();
}
let conn = Connection::open(db_path).expect("Failed to open cache database");
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS scan_cache (
key TEXT PRIMARY KEY,
data TEXT NOT NULL,
created INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS repos (
cache_key TEXT PRIMARY KEY,
name TEXT NOT NULL,
source TEXT NOT NULL,
file_count INTEGER NOT NULL DEFAULT 0,
created INTEGER NOT NULL
);",
)
.expect("Failed to create tables");
Cache {
conn: Mutex::new(conn),
}
}
pub fn make_key(input: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(input.as_bytes());
hex::encode(hasher.finalize())
}
pub fn get(&self, key: &str) -> Option<String> {
let conn = self.conn.lock().unwrap();
let now = now_secs();
// Prune old entries
conn.execute(
"DELETE FROM scan_cache WHERE created < ?1",
[now.saturating_sub(TTL_SECS)],
)
.ok();
conn.query_row(
"SELECT data, created FROM scan_cache WHERE key = ?1",
[key],
|row| {
let data: String = row.get(0)?;
let created: u64 = row.get(1)?;
Ok((data, created))
},
)
.ok()
.and_then(|(data, created)| {
if now - created < TTL_SECS {
Some(data)
} else {
None
}
})
}
pub fn set(&self, key: &str, data: &str) {
let conn = self.conn.lock().unwrap();
conn.execute(
"INSERT OR REPLACE INTO scan_cache (key, data, created) VALUES (?1, ?2, ?3)",
rusqlite::params![key, data, now_secs()],
)
.ok();
}
pub fn record_repo(&self, cache_key: &str, name: &str, source: &str, file_count: usize) {
let conn = self.conn.lock().unwrap();
conn.execute(
"INSERT OR REPLACE INTO repos (cache_key, name, source, file_count, created) VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![cache_key, name, source, file_count, now_secs()],
)
.ok();
}
pub fn list_repos(&self) -> Vec<RepoEntry> {
let conn = self.conn.lock().unwrap();
let mut stmt = conn
.prepare("SELECT cache_key, name, source, file_count, created FROM repos ORDER BY created DESC LIMIT 50")
.unwrap();
stmt.query_map([], |row| {
Ok(RepoEntry {
cache_key: row.get(0)?,
name: row.get(1)?,
source: row.get(2)?,
file_count: row.get(3)?,
created: row.get(4)?,
})
})
.unwrap()
.filter_map(|r| r.ok())
.collect()
}
}
fn now_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs()
}

213
server/src/main.rs Normal file
View File

@@ -0,0 +1,213 @@
mod cache;
mod scanner;
use axum::{
extract::{DefaultBodyLimit, Multipart, Path, State},
http::StatusCode,
response::Json,
routing::{get, post},
Router,
};
use cache::{Cache, RepoEntry};
use scanner::{scan_dir, FileNode};
use serde::Deserialize;
use sha2::{Digest, Sha256};
use std::process::Command;
use std::sync::Arc;
use tempfile::TempDir;
use tower_http::services::ServeDir;
use tracing::info;
struct AppState {
cache: Cache,
}
#[derive(Deserialize)]
struct GitRequest {
url: String,
}
fn count_leaves(node: &FileNode) -> usize {
match &node.children {
Some(children) => children.iter().map(count_leaves).sum(),
None => 1,
}
}
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::init();
let data_dir_str = std::env::var("DATA_DIR").unwrap_or_else(|_| "data".to_string());
let data_dir = std::path::Path::new(&data_dir_str);
std::fs::create_dir_all(data_dir).ok();
let state = Arc::new(AppState {
cache: Cache::new(&data_dir.join("cache.db")),
});
let frontend_dir_str =
std::env::var("FRONTEND_DIR").unwrap_or_else(|_| "../web/dist".to_string());
let frontend_dir = std::path::Path::new(&frontend_dir_str);
let app = Router::new()
.route("/api/scan-git", post(scan_git))
.route("/api/scan-zip", post(scan_zip))
.route("/api/repos", get(list_repos))
.route("/api/repos/{key}", get(get_repo))
.layer(DefaultBodyLimit::max(100 * 1024 * 1024))
.with_state(state)
.fallback_service(ServeDir::new(frontend_dir).append_index_html_on_directories(true));
let port = std::env::var("PORT").unwrap_or_else(|_| "3000".to_string());
let addr = format!("0.0.0.0:{port}");
info!("repo-vis server running at http://localhost:{port}");
let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
axum::serve(listener, app).await.unwrap();
}
async fn list_repos(
State(state): State<Arc<AppState>>,
) -> Json<Vec<RepoEntry>> {
Json(state.cache.list_repos())
}
async fn get_repo(
State(state): State<Arc<AppState>>,
Path(key): Path<String>,
) -> Result<Json<FileNode>, (StatusCode, String)> {
state
.cache
.get(&key)
.and_then(|data| serde_json::from_str(&data).ok())
.map(Json)
.ok_or((StatusCode::NOT_FOUND, "Repo not found in cache".to_string()))
}
async fn scan_git(
State(state): State<Arc<AppState>>,
Json(req): Json<GitRequest>,
) -> Result<Json<FileNode>, (StatusCode, String)> {
let url = req.url.trim().to_string();
if !url.starts_with("http://")
&& !url.starts_with("https://")
&& !url.starts_with("git@")
{
return Err((StatusCode::BAD_REQUEST, "Invalid git URL".to_string()));
}
// Check cache
let key = Cache::make_key(&format!("git:{url}"));
if let Some(cached) = state.cache.get(&key) {
info!("Cache hit for {url}");
let tree: FileNode =
serde_json::from_str(&cached).map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
return Ok(Json(tree));
}
// Clone into temp dir
let tmp = TempDir::new().map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
info!("Cloning {url} ...");
let output = Command::new("git")
.args(["clone", "--depth", "1", "--", &url])
.arg(tmp.path())
.output()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("git clone failed: {e}")))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("git clone failed: {stderr}"),
));
}
let mut tree = scan_dir(tmp.path(), tmp.path());
let repo_name = url
.trim_end_matches('/')
.trim_end_matches(".git")
.rsplit('/')
.next()
.unwrap_or("repo")
.to_string();
tree.name = repo_name.clone();
let file_count = count_leaves(&tree);
if let Ok(json_str) = serde_json::to_string(&tree) {
state.cache.set(&key, &json_str);
state.cache.record_repo(&key, &repo_name, &url, file_count);
}
Ok(Json(tree))
}
async fn scan_zip(
State(state): State<Arc<AppState>>,
mut multipart: Multipart,
) -> Result<Json<FileNode>, (StatusCode, String)> {
let field = multipart
.next_field()
.await
.map_err(|e| (StatusCode::BAD_REQUEST, e.to_string()))?
.ok_or((StatusCode::BAD_REQUEST, "No file uploaded".to_string()))?;
let file_name = field
.file_name()
.unwrap_or("upload.zip")
.to_string();
let data = field
.bytes()
.await
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Failed to read upload: {e}")))?;
let mut hasher = Sha256::new();
hasher.update(&data);
let hash = hex::encode(hasher.finalize());
let key = Cache::make_key(&format!("zip:{hash}"));
if let Some(cached) = state.cache.get(&key) {
info!("Cache hit for zip {file_name}");
let tree: FileNode =
serde_json::from_str(&cached).map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
return Ok(Json(tree));
}
let tmp = TempDir::new().map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let cursor = std::io::Cursor::new(&data);
let mut archive =
zip::ZipArchive::new(cursor).map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid zip: {e}")))?;
archive
.extract(tmp.path())
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Extract failed: {e}")))?;
let entries: Vec<_> = std::fs::read_dir(tmp.path())
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.filter_map(|e| e.ok())
.collect();
let scan_root = if entries.len() == 1 && entries[0].file_type().map(|t| t.is_dir()).unwrap_or(false) {
entries[0].path()
} else {
tmp.path().to_path_buf()
};
let mut tree = scan_dir(&scan_root, &scan_root);
let zip_name = file_name.trim_end_matches(".zip").to_string();
tree.name = zip_name.clone();
let file_count = count_leaves(&tree);
if let Ok(json_str) = serde_json::to_string(&tree) {
state.cache.set(&key, &json_str);
state.cache.record_repo(&key, &zip_name, &format!("zip:{file_name}"), file_count);
}
Ok(Json(tree))
}

172
server/src/scanner.rs Normal file
View File

@@ -0,0 +1,172 @@
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::fs;
use std::path::Path;
const MAX_FILE_SIZE: u64 = 256 * 1024;
static IGNORE_DIRS: &[&str] = &[
"node_modules", ".git", ".hg", ".svn", "dist", "build", "__pycache__",
".next", ".nuxt", ".output", "vendor", ".venv", "venv", "target",
".idea", ".vscode", ".cache",
];
static TEXT_EXTENSIONS: &[&str] = &[
"js", "jsx", "ts", "tsx", "mjs", "cjs", "py", "pyw", "go", "rs", "c", "h", "cpp", "hpp",
"cc", "java", "kt", "scala", "rb", "php", "lua", "pl", "pm", "sh", "bash", "zsh", "fish",
"html", "htm", "css", "scss", "less", "sass", "json", "yaml", "yml", "toml", "ini", "cfg",
"xml", "svg", "md", "txt", "rst", "sql", "graphql", "gql", "vue", "svelte", "astro", "tf",
"hcl", "proto", "thrift", "r", "jl", "ex", "exs", "erl", "hrl", "zig", "nim", "v",
"swift", "m", "mm", "cs", "fs", "fsx", "hs", "lhs", "ml", "mli", "clj", "cljs", "cljc",
"edn", "el", "lisp", "scm", "rkt", "cmake",
];
static SPECIAL_NAMES: &[&str] = &[
"Makefile", "Dockerfile", "Containerfile", "Vagrantfile", "Rakefile", "Gemfile",
"Brewfile", "Procfile", "CMakeLists.txt", "meson.build", "BUILD", "WORKSPACE",
".gitignore", ".dockerignore", ".editorconfig", ".env.example",
];
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FileNode {
pub name: String,
pub path: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub lines: Option<usize>,
#[serde(rename = "maxLen", skip_serializing_if = "Option::is_none")]
pub max_len: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub children: Option<Vec<FileNode>>,
}
fn is_text_file(name: &str, ext: Option<&str>) -> bool {
if SPECIAL_NAMES.contains(&name) {
return true;
}
match ext {
Some(e) => {
let lower = e.to_ascii_lowercase();
TEXT_EXTENSIONS.contains(&lower.as_str())
}
None => false,
}
}
pub fn scan_dir(dir: &Path, root: &Path) -> FileNode {
let ignore_set: HashSet<&str> = IGNORE_DIRS.iter().copied().collect();
let mut children = Vec::new();
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => {
return FileNode {
name: dir_name(dir),
path: rel_path(dir, root),
lines: None,
max_len: None,
content: None,
children: Some(Vec::new()),
};
}
};
let mut sorted: Vec<_> = entries.filter_map(|e| e.ok()).collect();
sorted.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
for entry in sorted {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if ignore_set.contains(name_str.as_ref()) {
continue;
}
let path = entry.path();
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() {
let subtree = scan_dir(&path, root);
if subtree
.children
.as_ref()
.map(|c| !c.is_empty())
.unwrap_or(false)
{
children.push(subtree);
}
} else if ft.is_file() {
let ext = path.extension().and_then(|e| e.to_str());
if !is_text_file(&name_str, ext) {
continue;
}
let meta = match fs::metadata(&path) {
Ok(m) => m,
Err(_) => continue,
};
if meta.len() == 0 || meta.len() > MAX_FILE_SIZE {
continue;
}
let content = match fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => continue, // binary or encoding error
};
let lines: Vec<&str> = content.split('\n').collect();
let line_count = lines.len();
// P99 line length to avoid outlier long lines distorting layout
let max_len = percentile_line_len(&lines, 99).max(1);
children.push(FileNode {
name: name_str.to_string(),
path: rel_path(&path, root),
lines: Some(line_count),
max_len: Some(max_len),
content: Some(content),
children: None,
});
}
}
FileNode {
name: dir_name(dir),
path: rel_path(dir, root),
lines: None,
max_len: None,
content: None,
children: Some(children),
}
}
fn percentile_line_len(lines: &[&str], pct: usize) -> usize {
if lines.is_empty() {
return 0;
}
let mut lens: Vec<usize> = lines.iter().map(|l| l.len()).collect();
lens.sort_unstable();
let idx = (lens.len() * pct / 100).min(lens.len() - 1);
lens[idx]
}
fn dir_name(dir: &Path) -> String {
dir.file_name()
.and_then(|n| n.to_str())
.unwrap_or(".")
.to_string()
}
fn rel_path(path: &Path, root: &Path) -> String {
path.strip_prefix(root)
.map(|p| {
let s = p.to_string_lossy().to_string();
if s.is_empty() { ".".to_string() } else { s }
})
.unwrap_or_else(|_| ".".to_string())
}