"""Import Claude Code conversation history into nocmem. Scans ~/.claude/projects/ for JSONL conversation files, extracts user-assistant turn pairs, and ingests them via /ingest API. Usage: uv run python import_claude.py [--dry-run] [--limit N] """ import argparse import json import os import sys import time from pathlib import Path import requests BASE = os.environ.get("NOCMEM_ENDPOINT", "http://127.0.0.1:9820") CLAUDE_DIR = Path.home() / ".claude" / "projects" def extract_turns(jsonl_path: Path) -> list[tuple[str, str]]: """Extract (user_msg, assistant_msg) pairs from a JSONL conversation.""" messages = [] # (role, text) with open(jsonl_path) as f: for line in f: try: obj = json.loads(line) except json.JSONDecodeError: continue msg_type = obj.get("type") if msg_type not in ("user", "assistant"): continue msg = obj.get("message", {}) content = msg.get("content", "") # extract text from content if isinstance(content, str): text = content.strip() elif isinstance(content, list): parts = [] for part in content: if isinstance(part, dict) and part.get("type") == "text": parts.append(part["text"]) text = "\n".join(parts).strip() else: continue if not text or len(text) < 10: continue # skip tool-heavy assistant responses (mostly noise) if msg_type == "assistant" and text.count("```") > 10: continue role = "user" if msg_type == "user" else "assistant" messages.append((role, text)) # pair up user-assistant turns turns = [] i = 0 while i < len(messages) - 1: if messages[i][0] == "user": # find next assistant j = i + 1 while j < len(messages) and messages[j][0] != "assistant": j += 1 if j < len(messages): user_text = messages[i][1][:500] # truncate long messages asst_text = messages[j][1][:500] turns.append((user_text, asst_text)) i = j + 1 else: i += 1 return turns def ingest_turn(user_msg: str, assistant_msg: str) -> int: """Send a turn to nocmem /ingest, return number of memories stored.""" try: r = requests.post( f"{BASE}/ingest", json={"user_msg": user_msg, "assistant_msg": assistant_msg}, timeout=120, ) if r.status_code == 200: return r.json().get("stored", 0) except Exception as e: print(f" error: {e}", file=sys.stderr) return 0 def main(): parser = argparse.ArgumentParser(description="Import Claude Code history into nocmem") parser.add_argument("--dry-run", action="store_true", help="just show what would be imported") parser.add_argument("--limit", type=int, default=0, help="max turns to ingest (0=all)") parser.add_argument("--project", type=str, default="", help="filter by project dir name substring") args = parser.parse_args() # find all conversation files conversations = [] for project_dir in sorted(CLAUDE_DIR.iterdir()): if not project_dir.is_dir(): continue if args.project and args.project not in project_dir.name: continue for jsonl in sorted(project_dir.glob("*.jsonl")): if "subagents" in str(jsonl): continue conversations.append((project_dir.name, jsonl)) print(f"found {len(conversations)} conversations in {CLAUDE_DIR}") if args.project: print(f" filtered by: {args.project}") # extract all turns all_turns = [] for project_name, jsonl_path in conversations: turns = extract_turns(jsonl_path) if turns: all_turns.extend([(project_name, u, a) for u, a in turns]) print(f"extracted {len(all_turns)} turns total\n") if args.limit: all_turns = all_turns[:args.limit] if args.dry_run: for project, user_msg, asst_msg in all_turns[:20]: print(f" [{project[:30]}]") print(f" U: {user_msg[:80]}") print(f" A: {asst_msg[:80]}") print() if len(all_turns) > 20: print(f" ... and {len(all_turns) - 20} more") return # check server try: r = requests.get(f"{BASE}/stats", timeout=3) r.raise_for_status() before = r.json()["num_memories"] print(f"nocmem: {before} memories before import\n") except Exception: print(f"ERROR: nocmem not reachable at {BASE}") sys.exit(1) # ingest total_stored = 0 t0 = time.monotonic() for i, (project, user_msg, asst_msg) in enumerate(all_turns): stored = ingest_turn(user_msg, asst_msg) total_stored += stored if (i + 1) % 10 == 0: elapsed = time.monotonic() - t0 rate = (i + 1) / elapsed eta = (len(all_turns) - i - 1) / rate if rate > 0 else 0 print(f" [{i+1}/{len(all_turns)}] stored={total_stored} ({rate:.1f} turns/s, ETA {eta:.0f}s)") elapsed = time.monotonic() - t0 # final stats r = requests.get(f"{BASE}/stats") after = r.json()["num_memories"] print(f"\n{'='*50}") print(f"imported {total_stored} memories from {len(all_turns)} turns") print(f"nocmem: {before} → {after} memories") print(f"time: {elapsed:.1f}s") if __name__ == "__main__": main()