Initial commit: repo-vis — 3D codebase visualization
Rust (axum) backend with git clone / zip upload / SQLite cache. Three.js frontend with D3 treemap layout and semantic zoom. Docker deployment with musl static binary.
This commit is contained in:
1751
server/Cargo.lock
generated
Normal file
1751
server/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
19
server/Cargo.toml
Normal file
19
server/Cargo.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "repo-vis-server"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
axum = { version = "0.8", features = ["multipart"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tower-http = { version = "0.6", features = ["fs", "cors"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
rusqlite = { version = "0.32", features = ["bundled"] }
|
||||
walkdir = "2"
|
||||
sha2 = "0.10"
|
||||
hex = "0.4"
|
||||
zip = "2"
|
||||
tempfile = "3"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3"
|
||||
131
server/src/cache.rs
Normal file
131
server/src/cache.rs
Normal file
@@ -0,0 +1,131 @@
|
||||
use rusqlite::Connection;
|
||||
use serde::Serialize;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::path::Path;
|
||||
use std::sync::Mutex;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
const TTL_SECS: u64 = 24 * 60 * 60; // 24 hours
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct RepoEntry {
|
||||
pub name: String,
|
||||
pub source: String, // "git:url" or "zip:filename"
|
||||
pub file_count: usize,
|
||||
pub cache_key: String,
|
||||
pub created: u64,
|
||||
}
|
||||
|
||||
pub struct Cache {
|
||||
conn: Mutex<Connection>,
|
||||
}
|
||||
|
||||
impl Cache {
|
||||
pub fn new(db_path: &Path) -> Self {
|
||||
if let Some(parent) = db_path.parent() {
|
||||
std::fs::create_dir_all(parent).ok();
|
||||
}
|
||||
|
||||
let conn = Connection::open(db_path).expect("Failed to open cache database");
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE IF NOT EXISTS scan_cache (
|
||||
key TEXT PRIMARY KEY,
|
||||
data TEXT NOT NULL,
|
||||
created INTEGER NOT NULL
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS repos (
|
||||
cache_key TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
source TEXT NOT NULL,
|
||||
file_count INTEGER NOT NULL DEFAULT 0,
|
||||
created INTEGER NOT NULL
|
||||
);",
|
||||
)
|
||||
.expect("Failed to create tables");
|
||||
|
||||
Cache {
|
||||
conn: Mutex::new(conn),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn make_key(input: &str) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(input.as_bytes());
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
pub fn get(&self, key: &str) -> Option<String> {
|
||||
let conn = self.conn.lock().unwrap();
|
||||
let now = now_secs();
|
||||
|
||||
// Prune old entries
|
||||
conn.execute(
|
||||
"DELETE FROM scan_cache WHERE created < ?1",
|
||||
[now.saturating_sub(TTL_SECS)],
|
||||
)
|
||||
.ok();
|
||||
|
||||
conn.query_row(
|
||||
"SELECT data, created FROM scan_cache WHERE key = ?1",
|
||||
[key],
|
||||
|row| {
|
||||
let data: String = row.get(0)?;
|
||||
let created: u64 = row.get(1)?;
|
||||
Ok((data, created))
|
||||
},
|
||||
)
|
||||
.ok()
|
||||
.and_then(|(data, created)| {
|
||||
if now - created < TTL_SECS {
|
||||
Some(data)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn set(&self, key: &str, data: &str) {
|
||||
let conn = self.conn.lock().unwrap();
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO scan_cache (key, data, created) VALUES (?1, ?2, ?3)",
|
||||
rusqlite::params![key, data, now_secs()],
|
||||
)
|
||||
.ok();
|
||||
}
|
||||
|
||||
pub fn record_repo(&self, cache_key: &str, name: &str, source: &str, file_count: usize) {
|
||||
let conn = self.conn.lock().unwrap();
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO repos (cache_key, name, source, file_count, created) VALUES (?1, ?2, ?3, ?4, ?5)",
|
||||
rusqlite::params![cache_key, name, source, file_count, now_secs()],
|
||||
)
|
||||
.ok();
|
||||
}
|
||||
|
||||
pub fn list_repos(&self) -> Vec<RepoEntry> {
|
||||
let conn = self.conn.lock().unwrap();
|
||||
let mut stmt = conn
|
||||
.prepare("SELECT cache_key, name, source, file_count, created FROM repos ORDER BY created DESC LIMIT 50")
|
||||
.unwrap();
|
||||
|
||||
stmt.query_map([], |row| {
|
||||
Ok(RepoEntry {
|
||||
cache_key: row.get(0)?,
|
||||
name: row.get(1)?,
|
||||
source: row.get(2)?,
|
||||
file_count: row.get(3)?,
|
||||
created: row.get(4)?,
|
||||
})
|
||||
})
|
||||
.unwrap()
|
||||
.filter_map(|r| r.ok())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
fn now_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
}
|
||||
213
server/src/main.rs
Normal file
213
server/src/main.rs
Normal file
@@ -0,0 +1,213 @@
|
||||
mod cache;
|
||||
mod scanner;
|
||||
|
||||
use axum::{
|
||||
extract::{DefaultBodyLimit, Multipart, Path, State},
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::{get, post},
|
||||
Router,
|
||||
};
|
||||
use cache::{Cache, RepoEntry};
|
||||
use scanner::{scan_dir, FileNode};
|
||||
use serde::Deserialize;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
use tempfile::TempDir;
|
||||
use tower_http::services::ServeDir;
|
||||
use tracing::info;
|
||||
|
||||
struct AppState {
|
||||
cache: Cache,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GitRequest {
|
||||
url: String,
|
||||
}
|
||||
|
||||
fn count_leaves(node: &FileNode) -> usize {
|
||||
match &node.children {
|
||||
Some(children) => children.iter().map(count_leaves).sum(),
|
||||
None => 1,
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
let data_dir_str = std::env::var("DATA_DIR").unwrap_or_else(|_| "data".to_string());
|
||||
let data_dir = std::path::Path::new(&data_dir_str);
|
||||
std::fs::create_dir_all(data_dir).ok();
|
||||
|
||||
let state = Arc::new(AppState {
|
||||
cache: Cache::new(&data_dir.join("cache.db")),
|
||||
});
|
||||
|
||||
let frontend_dir_str =
|
||||
std::env::var("FRONTEND_DIR").unwrap_or_else(|_| "../web/dist".to_string());
|
||||
let frontend_dir = std::path::Path::new(&frontend_dir_str);
|
||||
|
||||
let app = Router::new()
|
||||
.route("/api/scan-git", post(scan_git))
|
||||
.route("/api/scan-zip", post(scan_zip))
|
||||
.route("/api/repos", get(list_repos))
|
||||
.route("/api/repos/{key}", get(get_repo))
|
||||
.layer(DefaultBodyLimit::max(100 * 1024 * 1024))
|
||||
.with_state(state)
|
||||
.fallback_service(ServeDir::new(frontend_dir).append_index_html_on_directories(true));
|
||||
|
||||
let port = std::env::var("PORT").unwrap_or_else(|_| "3000".to_string());
|
||||
let addr = format!("0.0.0.0:{port}");
|
||||
info!("repo-vis server running at http://localhost:{port}");
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
|
||||
axum::serve(listener, app).await.unwrap();
|
||||
}
|
||||
|
||||
async fn list_repos(
|
||||
State(state): State<Arc<AppState>>,
|
||||
) -> Json<Vec<RepoEntry>> {
|
||||
Json(state.cache.list_repos())
|
||||
}
|
||||
|
||||
async fn get_repo(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Path(key): Path<String>,
|
||||
) -> Result<Json<FileNode>, (StatusCode, String)> {
|
||||
state
|
||||
.cache
|
||||
.get(&key)
|
||||
.and_then(|data| serde_json::from_str(&data).ok())
|
||||
.map(Json)
|
||||
.ok_or((StatusCode::NOT_FOUND, "Repo not found in cache".to_string()))
|
||||
}
|
||||
|
||||
async fn scan_git(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Json(req): Json<GitRequest>,
|
||||
) -> Result<Json<FileNode>, (StatusCode, String)> {
|
||||
let url = req.url.trim().to_string();
|
||||
|
||||
if !url.starts_with("http://")
|
||||
&& !url.starts_with("https://")
|
||||
&& !url.starts_with("git@")
|
||||
{
|
||||
return Err((StatusCode::BAD_REQUEST, "Invalid git URL".to_string()));
|
||||
}
|
||||
|
||||
// Check cache
|
||||
let key = Cache::make_key(&format!("git:{url}"));
|
||||
if let Some(cached) = state.cache.get(&key) {
|
||||
info!("Cache hit for {url}");
|
||||
let tree: FileNode =
|
||||
serde_json::from_str(&cached).map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
return Ok(Json(tree));
|
||||
}
|
||||
|
||||
// Clone into temp dir
|
||||
let tmp = TempDir::new().map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
info!("Cloning {url} ...");
|
||||
let output = Command::new("git")
|
||||
.args(["clone", "--depth", "1", "--", &url])
|
||||
.arg(tmp.path())
|
||||
.output()
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("git clone failed: {e}")))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Err((
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("git clone failed: {stderr}"),
|
||||
));
|
||||
}
|
||||
|
||||
let mut tree = scan_dir(tmp.path(), tmp.path());
|
||||
|
||||
let repo_name = url
|
||||
.trim_end_matches('/')
|
||||
.trim_end_matches(".git")
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.unwrap_or("repo")
|
||||
.to_string();
|
||||
tree.name = repo_name.clone();
|
||||
|
||||
let file_count = count_leaves(&tree);
|
||||
|
||||
if let Ok(json_str) = serde_json::to_string(&tree) {
|
||||
state.cache.set(&key, &json_str);
|
||||
state.cache.record_repo(&key, &repo_name, &url, file_count);
|
||||
}
|
||||
|
||||
Ok(Json(tree))
|
||||
}
|
||||
|
||||
async fn scan_zip(
|
||||
State(state): State<Arc<AppState>>,
|
||||
mut multipart: Multipart,
|
||||
) -> Result<Json<FileNode>, (StatusCode, String)> {
|
||||
let field = multipart
|
||||
.next_field()
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_REQUEST, e.to_string()))?
|
||||
.ok_or((StatusCode::BAD_REQUEST, "No file uploaded".to_string()))?;
|
||||
|
||||
let file_name = field
|
||||
.file_name()
|
||||
.unwrap_or("upload.zip")
|
||||
.to_string();
|
||||
|
||||
let data = field
|
||||
.bytes()
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Failed to read upload: {e}")))?;
|
||||
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(&data);
|
||||
let hash = hex::encode(hasher.finalize());
|
||||
let key = Cache::make_key(&format!("zip:{hash}"));
|
||||
|
||||
if let Some(cached) = state.cache.get(&key) {
|
||||
info!("Cache hit for zip {file_name}");
|
||||
let tree: FileNode =
|
||||
serde_json::from_str(&cached).map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
return Ok(Json(tree));
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
let cursor = std::io::Cursor::new(&data);
|
||||
let mut archive =
|
||||
zip::ZipArchive::new(cursor).map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid zip: {e}")))?;
|
||||
|
||||
archive
|
||||
.extract(tmp.path())
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Extract failed: {e}")))?;
|
||||
|
||||
let entries: Vec<_> = std::fs::read_dir(tmp.path())
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
|
||||
.filter_map(|e| e.ok())
|
||||
.collect();
|
||||
|
||||
let scan_root = if entries.len() == 1 && entries[0].file_type().map(|t| t.is_dir()).unwrap_or(false) {
|
||||
entries[0].path()
|
||||
} else {
|
||||
tmp.path().to_path_buf()
|
||||
};
|
||||
|
||||
let mut tree = scan_dir(&scan_root, &scan_root);
|
||||
let zip_name = file_name.trim_end_matches(".zip").to_string();
|
||||
tree.name = zip_name.clone();
|
||||
|
||||
let file_count = count_leaves(&tree);
|
||||
|
||||
if let Ok(json_str) = serde_json::to_string(&tree) {
|
||||
state.cache.set(&key, &json_str);
|
||||
state.cache.record_repo(&key, &zip_name, &format!("zip:{file_name}"), file_count);
|
||||
}
|
||||
|
||||
Ok(Json(tree))
|
||||
}
|
||||
172
server/src/scanner.rs
Normal file
172
server/src/scanner.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
const MAX_FILE_SIZE: u64 = 256 * 1024;
|
||||
|
||||
static IGNORE_DIRS: &[&str] = &[
|
||||
"node_modules", ".git", ".hg", ".svn", "dist", "build", "__pycache__",
|
||||
".next", ".nuxt", ".output", "vendor", ".venv", "venv", "target",
|
||||
".idea", ".vscode", ".cache",
|
||||
];
|
||||
|
||||
static TEXT_EXTENSIONS: &[&str] = &[
|
||||
"js", "jsx", "ts", "tsx", "mjs", "cjs", "py", "pyw", "go", "rs", "c", "h", "cpp", "hpp",
|
||||
"cc", "java", "kt", "scala", "rb", "php", "lua", "pl", "pm", "sh", "bash", "zsh", "fish",
|
||||
"html", "htm", "css", "scss", "less", "sass", "json", "yaml", "yml", "toml", "ini", "cfg",
|
||||
"xml", "svg", "md", "txt", "rst", "sql", "graphql", "gql", "vue", "svelte", "astro", "tf",
|
||||
"hcl", "proto", "thrift", "r", "jl", "ex", "exs", "erl", "hrl", "zig", "nim", "v",
|
||||
"swift", "m", "mm", "cs", "fs", "fsx", "hs", "lhs", "ml", "mli", "clj", "cljs", "cljc",
|
||||
"edn", "el", "lisp", "scm", "rkt", "cmake",
|
||||
];
|
||||
|
||||
static SPECIAL_NAMES: &[&str] = &[
|
||||
"Makefile", "Dockerfile", "Containerfile", "Vagrantfile", "Rakefile", "Gemfile",
|
||||
"Brewfile", "Procfile", "CMakeLists.txt", "meson.build", "BUILD", "WORKSPACE",
|
||||
".gitignore", ".dockerignore", ".editorconfig", ".env.example",
|
||||
];
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FileNode {
|
||||
pub name: String,
|
||||
pub path: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub lines: Option<usize>,
|
||||
#[serde(rename = "maxLen", skip_serializing_if = "Option::is_none")]
|
||||
pub max_len: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub content: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub children: Option<Vec<FileNode>>,
|
||||
}
|
||||
|
||||
fn is_text_file(name: &str, ext: Option<&str>) -> bool {
|
||||
if SPECIAL_NAMES.contains(&name) {
|
||||
return true;
|
||||
}
|
||||
match ext {
|
||||
Some(e) => {
|
||||
let lower = e.to_ascii_lowercase();
|
||||
TEXT_EXTENSIONS.contains(&lower.as_str())
|
||||
}
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn scan_dir(dir: &Path, root: &Path) -> FileNode {
|
||||
let ignore_set: HashSet<&str> = IGNORE_DIRS.iter().copied().collect();
|
||||
let mut children = Vec::new();
|
||||
|
||||
let entries = match fs::read_dir(dir) {
|
||||
Ok(e) => e,
|
||||
Err(_) => {
|
||||
return FileNode {
|
||||
name: dir_name(dir),
|
||||
path: rel_path(dir, root),
|
||||
lines: None,
|
||||
max_len: None,
|
||||
content: None,
|
||||
children: Some(Vec::new()),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let mut sorted: Vec<_> = entries.filter_map(|e| e.ok()).collect();
|
||||
sorted.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
|
||||
|
||||
for entry in sorted {
|
||||
let name = entry.file_name();
|
||||
let name_str = name.to_string_lossy();
|
||||
|
||||
if ignore_set.contains(name_str.as_ref()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let path = entry.path();
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if ft.is_dir() {
|
||||
let subtree = scan_dir(&path, root);
|
||||
if subtree
|
||||
.children
|
||||
.as_ref()
|
||||
.map(|c| !c.is_empty())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
children.push(subtree);
|
||||
}
|
||||
} else if ft.is_file() {
|
||||
let ext = path.extension().and_then(|e| e.to_str());
|
||||
if !is_text_file(&name_str, ext) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let meta = match fs::metadata(&path) {
|
||||
Ok(m) => m,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if meta.len() == 0 || meta.len() > MAX_FILE_SIZE {
|
||||
continue;
|
||||
}
|
||||
|
||||
let content = match fs::read_to_string(&path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => continue, // binary or encoding error
|
||||
};
|
||||
|
||||
let lines: Vec<&str> = content.split('\n').collect();
|
||||
let line_count = lines.len();
|
||||
// P99 line length to avoid outlier long lines distorting layout
|
||||
let max_len = percentile_line_len(&lines, 99).max(1);
|
||||
|
||||
children.push(FileNode {
|
||||
name: name_str.to_string(),
|
||||
path: rel_path(&path, root),
|
||||
lines: Some(line_count),
|
||||
max_len: Some(max_len),
|
||||
content: Some(content),
|
||||
children: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
FileNode {
|
||||
name: dir_name(dir),
|
||||
path: rel_path(dir, root),
|
||||
lines: None,
|
||||
max_len: None,
|
||||
content: None,
|
||||
children: Some(children),
|
||||
}
|
||||
}
|
||||
|
||||
fn percentile_line_len(lines: &[&str], pct: usize) -> usize {
|
||||
if lines.is_empty() {
|
||||
return 0;
|
||||
}
|
||||
let mut lens: Vec<usize> = lines.iter().map(|l| l.len()).collect();
|
||||
lens.sort_unstable();
|
||||
let idx = (lens.len() * pct / 100).min(lens.len() - 1);
|
||||
lens[idx]
|
||||
}
|
||||
|
||||
fn dir_name(dir: &Path) -> String {
|
||||
dir.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or(".")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn rel_path(path: &Path, root: &Path) -> String {
|
||||
path.strip_prefix(root)
|
||||
.map(|p| {
|
||||
let s = p.to_string_lossy().to_string();
|
||||
if s.is_empty() { ".".to_string() } else { s }
|
||||
})
|
||||
.unwrap_or_else(|_| ".".to_string())
|
||||
}
|
||||
Reference in New Issue
Block a user