From 35cafbd4cae7fd1dbe825530d0aca7dcb0d0eb44 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Sat, 11 Apr 2026 22:57:17 +0100 Subject: [PATCH] nocmem: qa-style extraction prompt, multi-cue variants, claude history importer - Switch extraction prompt to qa-style (80% recall vs 60% baseline) - Semicolon-separated cues in extraction become paraphrase variants - Add import_claude.py to bulk-import Claude Code conversation history - Fix LLM model name in systemd service, add logging basicConfig --- mem/benchmarks/prompt_experiment.py | 217 ++++++++++++++++++++++++++++ mem/import_claude.py | 178 +++++++++++++++++++++++ mem/nocmem.service | 4 +- mem/server.py | 38 +++-- 4 files changed, 423 insertions(+), 14 deletions(-) create mode 100644 mem/benchmarks/prompt_experiment.py create mode 100644 mem/import_claude.py diff --git a/mem/benchmarks/prompt_experiment.py b/mem/benchmarks/prompt_experiment.py new file mode 100644 index 0000000..d88c8f3 --- /dev/null +++ b/mem/benchmarks/prompt_experiment.py @@ -0,0 +1,217 @@ +"""Test different extraction prompts for ingest quality. + +Takes a few real conversation turns, tries different prompts, +compares what gets extracted, then tests recall against target queries. +""" + +import json +import time +import requests +import torch +from sentence_transformers import SentenceTransformer +from nuonuo.hippocampus import HippocampalMemory +from openai import OpenAI + +DEVICE = "cuda" +LLM = OpenAI(base_url="http://100.84.7.49:8000/v1", api_key="unused") +MODEL = "gemma4-31b" +encoder = SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE) + +# real conversation turns from Claude Code history +TURNS = [ + ("装好了tailscale。你帮忙生成吧,用新的bot名叫yoyo", + "好,Tailscale IP `100.116.213.25`,yoyo 用户创建成功。继续搞 token 和部署。"), + ("能来一个org共享的gitea ci cd runner 不,就跑在这台机器上就行。", + "搞起来!装 act_runner,注册到 euphon org,以后每个仓库直接加 `.gitea/workflows/` 就能跑 CI 了。"), + ("能不能给heradev一个bridge的nic,secondary的,这样就有192.168.1.0/24的地址可以用了", + "饭团,你是要给 heradev 这个 KVM 虚拟机加一个桥接网卡对吧?这样它就能直接拿到物理网段 192.168.1.0/24 的 IP。"), + ("i7 bot也不好,直接叫@ms吧", + "好,@ms 改好了。现在来研究全局 webhook 的问题——之前系统级 webhook 没触发,让我深入看看。"), + ("AI/ML 工具克隆 全部删掉吧,emblemcv,visint也删掉", + "好嘞,这一波删掉能省 ~62G。动手了。"), +] + +# queries we expect to recall +TARGET_QUERIES = [ + ("yoyo是什么", ["yoyo"]), + ("act runner怎么装的", ["act_runner", "runner"]), + ("heradev网络怎么配的", ["heradev", "桥接", "bridge", "192.168"]), + ("gitea bot叫什么", ["ms", "@ms", "麻薯"]), + ("清理了哪些项目", ["emblemcv", "visint", "62G", "删"]), +] + +# different extraction prompts to test +PROMPTS = { + "baseline": """From this conversation turn, extract key facts worth remembering for future conversations. +For each fact, provide a "cue" (what would trigger recalling this) and a "target" (the fact itself). +Rate importance 0-1 (1 = critical fact, 0 = trivial). + +User: {user} +Assistant: {assistant} + +Output format (one per line): +CUE: | TARGET: | IMPORTANCE: <0-1> + +Only extract genuinely useful facts. If nothing worth remembering, output NONE.""", + + "entity_focused": """从这段对话中提取值得记住的事实。重点关注: +- 名称、代号、别名(谁叫什么) +- 配置、参数、端口、地址 +- 做了什么操作、改了什么 +- 决策和原因 + +每条事实用以下格式输出(每行一条): +CUE: <用什么问题能想起这件事> | TARGET: <事实本身,要具体> | IMPORTANCE: <0-1> + +User: {user} +Assistant: {assistant} + +如果没有值得记住的,输出 NONE。""", + + "multi_cue": """从这段对话中提取值得长期记住的事实。 + +要求: +1. 每条事实提供 2-3 个不同的触发短语(cue),用分号分隔 +2. target 要具体、独立可理解(不依赖上下文) +3. 包含所有出现的名称、代号、配置值 + +格式(每行一条): +CUE: <触发短语1>; <触发短语2>; <触发短语3> | TARGET: <具体事实> | IMPORTANCE: <0-1> + +User: {user} +Assistant: {assistant} + +没有值得记住的则输出 NONE。""", + + "qa_style": """你是一个记忆提取器。把这段对话变成若干个"问答对"——未来有人问这个问题时,能直接给出答案。 + +要求: +- 问题要自然,像人真的会这么问 +- 答案要具体完整,包含关键细节(名称、数字、地址等) +- 同一个事实可以从不同角度提问 + +格式(每行一条): +CUE: <自然的提问方式> | TARGET: <完整的回答> | IMPORTANCE: <0-1> + +User: {user} +Assistant: {assistant} + +没有值得记住的则输出 NONE。""", +} + +import re + +def extract_with_prompt(prompt_template, user_msg, asst_msg): + prompt = prompt_template.format(user=user_msg, assistant=asst_msg) + try: + resp = LLM.chat.completions.create( + model=MODEL, + messages=[{"role": "user", "content": prompt}], + temperature=0.3, max_tokens=512, + ) + result = resp.choices[0].message.content + except Exception as e: + return [] + + memories = [] + for line in result.strip().split("\n"): + if line.strip() == "NONE": + break + m = re.match(r"CUE:\s*(.+?)\s*\|\s*TARGET:\s*(.+?)\s*\|\s*IMPORTANCE:\s*([\d.]+)", line) + if m: + memories.append({ + "cue": m.group(1).strip(), + "target": m.group(2).strip(), + "importance": float(m.group(3)), + }) + return memories + + +def emb(text): + return encoder.encode([text], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0] + + +def test_recall(memories_list, queries): + """Build a memory from extracted memories and test recall.""" + hip = HippocampalMemory(embed_dim=384, beta=32.0, hopfield_top_k=10, device=DEVICE) + + for mem in memories_list: + cue_text = mem["cue"] + target_text = mem["target"] + cue_emb = emb(cue_text) + target_emb = emb(target_text) + + # handle multi-cue (semicolon separated) + variants = [] + if ";" in cue_text: + parts = [p.strip() for p in cue_text.split(";") if p.strip()] + if len(parts) > 1: + cue_emb = emb(parts[0]) + variants = [emb(p) for p in parts[1:]] + + hip.store(cue_emb, target_emb, cue_variants=variants if variants else None, + metadata={"cue": cue_text, "target": target_text}) + + hits = 0 + for query, keywords in queries: + qe = emb(query) + results = hip.recall(qe, top_k=3) + recalled_text = " ".join(r.metadata["target"] for r in results) + hit = any(kw.lower() in recalled_text.lower() for kw in keywords) + if hit: + hits += 1 + + return hits, len(queries) + + +def main(): + print("extraction prompt experiment\n") + print(f"turns: {len(TURNS)}, queries: {len(TARGET_QUERIES)}\n") + + for name, template in PROMPTS.items(): + print(f"{'='*60}") + print(f" prompt: {name}") + print(f"{'='*60}") + + all_memories = [] + for user_msg, asst_msg in TURNS: + mems = extract_with_prompt(template, user_msg, asst_msg) + all_memories.extend(mems) + for m in mems: + print(f" [{m['importance']:.1f}] CUE: {m['cue'][:50]}") + print(f" TGT: {m['target'][:60]}") + + print(f"\n extracted: {len(all_memories)} memories") + + hits, total = test_recall(all_memories, TARGET_QUERIES) + print(f" recall: {hits}/{total} ({hits/total*100:.0f}%)") + + # show per-query results + hip = HippocampalMemory(embed_dim=384, beta=32.0, hopfield_top_k=10, device=DEVICE) + for mem in all_memories: + cue_text = mem["cue"] + cue_emb = emb(cue_text.split(";")[0].strip() if ";" in cue_text else cue_text) + target_emb = emb(mem["target"]) + variants = [] + if ";" in cue_text: + parts = [p.strip() for p in cue_text.split(";") if p.strip()] + variants = [emb(p) for p in parts[1:]] if len(parts) > 1 else [] + hip.store(cue_emb, target_emb, cue_variants=variants or None, + metadata={"cue": cue_text, "target": mem["target"]}) + + for query, keywords in TARGET_QUERIES: + qe = emb(query) + results = hip.recall(qe, top_k=1) + if results: + target = results[0].metadata["target"][:60] + hit = any(kw.lower() in results[0].metadata["target"].lower() for kw in keywords) + mark = "✓" if hit else "✗" + print(f" {mark} {query:20s} → {target}") + else: + print(f" ✗ {query:20s} → (empty)") + + print() + + +if __name__ == "__main__": + main() diff --git a/mem/import_claude.py b/mem/import_claude.py new file mode 100644 index 0000000..88db808 --- /dev/null +++ b/mem/import_claude.py @@ -0,0 +1,178 @@ +"""Import Claude Code conversation history into nocmem. + +Scans ~/.claude/projects/ for JSONL conversation files, +extracts user-assistant turn pairs, and ingests them via /ingest API. + +Usage: + uv run python import_claude.py [--dry-run] [--limit N] +""" + +import argparse +import json +import os +import sys +import time +from pathlib import Path + +import requests + +BASE = os.environ.get("NOCMEM_ENDPOINT", "http://127.0.0.1:9820") +CLAUDE_DIR = Path.home() / ".claude" / "projects" + + +def extract_turns(jsonl_path: Path) -> list[tuple[str, str]]: + """Extract (user_msg, assistant_msg) pairs from a JSONL conversation.""" + messages = [] # (role, text) + + with open(jsonl_path) as f: + for line in f: + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + + msg_type = obj.get("type") + if msg_type not in ("user", "assistant"): + continue + + msg = obj.get("message", {}) + content = msg.get("content", "") + + # extract text from content + if isinstance(content, str): + text = content.strip() + elif isinstance(content, list): + parts = [] + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + parts.append(part["text"]) + text = "\n".join(parts).strip() + else: + continue + + if not text or len(text) < 10: + continue + + # skip tool-heavy assistant responses (mostly noise) + if msg_type == "assistant" and text.count("```") > 10: + continue + + role = "user" if msg_type == "user" else "assistant" + messages.append((role, text)) + + # pair up user-assistant turns + turns = [] + i = 0 + while i < len(messages) - 1: + if messages[i][0] == "user": + # find next assistant + j = i + 1 + while j < len(messages) and messages[j][0] != "assistant": + j += 1 + if j < len(messages): + user_text = messages[i][1][:500] # truncate long messages + asst_text = messages[j][1][:500] + turns.append((user_text, asst_text)) + i = j + 1 + else: + i += 1 + + return turns + + +def ingest_turn(user_msg: str, assistant_msg: str) -> int: + """Send a turn to nocmem /ingest, return number of memories stored.""" + try: + r = requests.post( + f"{BASE}/ingest", + json={"user_msg": user_msg, "assistant_msg": assistant_msg}, + timeout=120, + ) + if r.status_code == 200: + return r.json().get("stored", 0) + except Exception as e: + print(f" error: {e}", file=sys.stderr) + return 0 + + +def main(): + parser = argparse.ArgumentParser(description="Import Claude Code history into nocmem") + parser.add_argument("--dry-run", action="store_true", help="just show what would be imported") + parser.add_argument("--limit", type=int, default=0, help="max turns to ingest (0=all)") + parser.add_argument("--project", type=str, default="", help="filter by project dir name substring") + args = parser.parse_args() + + # find all conversation files + conversations = [] + for project_dir in sorted(CLAUDE_DIR.iterdir()): + if not project_dir.is_dir(): + continue + if args.project and args.project not in project_dir.name: + continue + for jsonl in sorted(project_dir.glob("*.jsonl")): + if "subagents" in str(jsonl): + continue + conversations.append((project_dir.name, jsonl)) + + print(f"found {len(conversations)} conversations in {CLAUDE_DIR}") + if args.project: + print(f" filtered by: {args.project}") + + # extract all turns + all_turns = [] + for project_name, jsonl_path in conversations: + turns = extract_turns(jsonl_path) + if turns: + all_turns.extend([(project_name, u, a) for u, a in turns]) + + print(f"extracted {len(all_turns)} turns total\n") + + if args.limit: + all_turns = all_turns[:args.limit] + + if args.dry_run: + for project, user_msg, asst_msg in all_turns[:20]: + print(f" [{project[:30]}]") + print(f" U: {user_msg[:80]}") + print(f" A: {asst_msg[:80]}") + print() + if len(all_turns) > 20: + print(f" ... and {len(all_turns) - 20} more") + return + + # check server + try: + r = requests.get(f"{BASE}/stats", timeout=3) + r.raise_for_status() + before = r.json()["num_memories"] + print(f"nocmem: {before} memories before import\n") + except Exception: + print(f"ERROR: nocmem not reachable at {BASE}") + sys.exit(1) + + # ingest + total_stored = 0 + t0 = time.monotonic() + for i, (project, user_msg, asst_msg) in enumerate(all_turns): + stored = ingest_turn(user_msg, asst_msg) + total_stored += stored + if (i + 1) % 10 == 0: + elapsed = time.monotonic() - t0 + rate = (i + 1) / elapsed + eta = (len(all_turns) - i - 1) / rate if rate > 0 else 0 + print(f" [{i+1}/{len(all_turns)}] stored={total_stored} ({rate:.1f} turns/s, ETA {eta:.0f}s)") + + elapsed = time.monotonic() - t0 + + # final stats + r = requests.get(f"{BASE}/stats") + after = r.json()["num_memories"] + + print(f"\n{'='*50}") + print(f"imported {total_stored} memories from {len(all_turns)} turns") + print(f"nocmem: {before} → {after} memories") + print(f"time: {elapsed:.1f}s") + + +if __name__ == "__main__": + main() diff --git a/mem/nocmem.service b/mem/nocmem.service index 6238a17..ad9be8e 100644 --- a/mem/nocmem.service +++ b/mem/nocmem.service @@ -5,12 +5,12 @@ After=network.target [Service] Type=simple WorkingDirectory=/data/src/noc/mem -ExecStart=/home/fam/.local/bin/uv run uvicorn server:app --host 0.0.0.0 --port 9820 +ExecStart=/home/fam/.local/bin/uv run uvicorn server:app --host 0.0.0.0 --port 9820 --log-level info Restart=on-failure RestartSec=5 Environment=NOCMEM_LLM_ENDPOINT=http://100.84.7.49:8000/v1 -Environment=NOCMEM_LLM_MODEL=QuantTrio/gemma-4-31B-it-AWQ +Environment=NOCMEM_LLM_MODEL=gemma4-31b Environment=NOCMEM_LLM_API_KEY=unused Environment=NOCMEM_DATA_DIR=/data/src/noc/mem/data Environment=NOCMEM_DEVICE=cuda diff --git a/mem/server.py b/mem/server.py index 827f22f..8796f08 100644 --- a/mem/server.py +++ b/mem/server.py @@ -21,6 +21,7 @@ from openai import OpenAI from nuonuo.hippocampus import HippocampalMemory +logging.basicConfig(level=logging.INFO) logger = logging.getLogger("nocmem") # ── config ────────────────────────────────────────────────────────── @@ -212,12 +213,16 @@ class ExtractedMemory: def _extract_memories_llm(user_msg: str, assistant_msg: str) -> list[ExtractedMemory]: prompt = ( - "From this conversation turn, extract key facts worth remembering for future conversations.\n" - "For each fact, provide a \"cue\" (what would trigger recalling this) and a \"target\" (the fact itself).\n" - "Rate importance 0-1 (1 = critical fact, 0 = trivial).\n\n" + '你是一个记忆提取器。把这段对话变成若干个"问答对"——未来有人问这个问题时,能直接给出答案。\n\n' + "要求:\n" + "- 问题要自然,像人真的会这么问\n" + "- 答案要具体完整,包含关键细节(名称、数字、地址等)\n" + "- 同一个事实可以从不同角度提问\n" + "- 每条 CUE 提供 2-3 个不同的触发短语,用分号分隔\n\n" + "格式(每行一条):\n" + "CUE: <提问方式1>; <提问方式2>; <提问方式3> | TARGET: <完整的回答> | IMPORTANCE: <0-1>\n\n" f"User: {user_msg}\nAssistant: {assistant_msg}\n\n" - "Output format (one per line):\nCUE: | TARGET: | IMPORTANCE: <0-1>\n\n" - "Only extract genuinely useful facts. If nothing worth remembering, output NONE." + "没有值得记住的则输出 NONE。" ) try: resp = llm_client.chat.completions.create( @@ -330,19 +335,28 @@ def _extract_and_store(user_msg: str, assistant_msg: str) -> int: if mem.importance < 0.3: continue - cue_emb = embed(mem.cue) + # split semicolon-separated cues into primary + variants + cue_parts = [p.strip() for p in mem.cue.split(";") if p.strip()] + primary_cue = cue_parts[0] if cue_parts else mem.cue + inline_variants = cue_parts[1:] if len(cue_parts) > 1 else [] + + cue_emb = embed(primary_cue) target_emb = embed(mem.target) - if llm_client: - paraphrases = _generate_paraphrases_llm(mem.cue, n=3) - else: - paraphrases = _generate_paraphrases_heuristic(mem.cue, n=3) + # inline variants from semicolon cues (already in the extraction) + variant_embs = embed_batch(inline_variants) if inline_variants else [] - variant_embs = embed_batch(paraphrases) if paraphrases else [] + # additionally generate paraphrases if no inline variants + if not inline_variants: + if llm_client: + paraphrases = _generate_paraphrases_llm(primary_cue, n=3) + else: + paraphrases = _generate_paraphrases_heuristic(primary_cue, n=3) + variant_embs = embed_batch(paraphrases) if paraphrases else [] hippocampus.store( cue_emb, target_emb, - cue_variants=variant_embs, + cue_variants=variant_embs if variant_embs else None, metadata={"cue": mem.cue, "target": mem.target, "importance": mem.importance}, timestamp=time.time(), )