add nocmem: auto memory recall + ingest via NuoNuo hippocampal network

- nocmem Python service (mem/): FastAPI wrapper around NuoNuo's Hopfield-Hebbian memory, with /recall, /ingest, /store, /stats endpoints - NOC integration: auto recall after user message (injected as system msg), async ingest after LLM response (fire-and-forget) - Recall: cosine pre-filter (threshold 0.35) + Hopfield attention (β=32), top_k=3, KV-cache friendly (appended after user msg, not in system prompt) - Ingest: LLM extraction + paraphrase augmentation, heuristic fallback - Wired into main.rs, life.rs (agent done), http.rs (api chat) - Config: optional `nocmem.endpoint` in config.yaml - Includes benchmarks: LongMemEval (R@5=94.0%), efficiency, noise vs scale - Design doc: doc/nocmem.md
2026-04-11 12:24:48 +01:00
parent 688387dac3
commit 7000ccda0f
17 changed files with 4164 additions and 3 deletions
--- a/mem/benchmarks/sharpness_test.py
+++ b/mem/benchmarks/sharpness_test.py
@@ -0,0 +1,104 @@
+"""Test Hopfield attention sharpness with different top_k and beta.
+
+Goal: find settings that give "either clearly remembered or nothing"
+instead of flat attention across 20 candidates.
+"""
+
+import torch
+from sentence_transformers import SentenceTransformer
+from nuonuo.hippocampus import HippocampalMemory
+
+DEVICE = "cuda"
+EMBED_DIM = 384
+
+print("loading encoder...")
+encoder = SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
+
+def emb(text):
+    return encoder.encode([text], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0]
+
+
+# store the same memories in each config
+MEMORIES = [
+    ("bot的名字叫什么", "bot的名字叫小乖，是Fam给取的"),
+    ("有哪些工具可以用", "工具有: fam_todo, send_file, spawn_agent, run_shell, run_python, update_memory"),
+    ("vLLM在5090上的性能", "RTX 5090上vLLM跑gemma只有4.8 tok/s，需要切换到awq_marlin"),
+    ("repo-vis项目是什么", "repo-vis用Rust后端+Three.js前端的3D代码库可视化，目标支持Linux内核和Pico VR"),
+    ("repo-vis的性能瓶颈", "Linux内核79K文件，SQLite 1GB上限和O(n)反序列化是瓶颈，需要n-ary tree按需合并"),
+    ("明天的待办事项", "最紧迫的是emblem scanner的AI Chat和KB部分"),
+    ("后端切换到了什么", "NOC后端切换到了vLLM，速度变快了"),
+    ("数据库密码在哪里", "数据库密码存在 /etc/secrets/db.env 文件中"),
+    ("什么GPU", "服务器有NVIDIA RTX 4090 24GB VRAM"),
+    ("home有多少log文件", "home目录及子目录下共有960个.log文件"),
+]
+
+QUERIES = [
+    ("repo-vis怎么样了", "repo-vis", True),      # should recall clearly
+    ("数据库密码", "密码", True),                   # should recall clearly
+    ("今天天气怎么样", "天气", False),              # irrelevant, should recall nothing
+    ("vllm速度", "vllm", True),                    # should recall clearly
+    ("你喜欢吃什么", "吃什么", False),              # irrelevant
+    ("VR支持", "VR", True),                        # edge case
+]
+
+CONFIGS = [
+    # (top_k, beta, label)
+    (20, 16.0, "baseline (top_k=20, β=16)"),
+    (10, 16.0, "top_k=10, β=16"),
+    (5,  16.0, "top_k=5,  β=16"),
+    (20, 32.0, "top_k=20, β=32"),
+    (20, 64.0, "top_k=20, β=64"),
+    (10, 32.0, "top_k=10, β=32"),
+    (5,  32.0, "top_k=5,  β=32"),
+    (5,  64.0, "top_k=5,  β=64"),
+]
+
+# pre-embed everything
+mem_embs = [(emb(c), emb(t), c, t) for c, t in MEMORIES]
+query_embs = [(emb(q), label, relevant) for q, label, relevant in QUERIES]
+
+print(f"\n{len(MEMORIES)} memories, {len(QUERIES)} queries, {len(CONFIGS)} configs\n")
+
+for top_k, beta, label in CONFIGS:
+    print(f"{'='*70}")
+    print(f"  {label}")
+    print(f"{'='*70}")
+
+    hip = HippocampalMemory(
+        embed_dim=EMBED_DIM, hopfield_top_k=top_k, beta=beta, device=DEVICE,
+    )
+    for ce, te, cue_text, target_text in mem_embs:
+        hip.store(ce, te, metadata={"cue": cue_text, "target": target_text})
+
+    for qe, qlabel, should_recall in query_embs:
+        results = hip.recall(qe, top_k=5)
+
+        # show distribution
+        sims = [r.similarity for r in results]
+        top1 = sims[0] if sims else 0
+        top2 = sims[1] if len(sims) > 1 else 0
+        gap = top1 - top2  # gap between #1 and #2
+        above_5pct = sum(1 for s in sims if s >= 0.05)
+        above_10pct = sum(1 for s in sims if s >= 0.10)
+
+        top_target = results[0].metadata["target"][:40] if results else "—"
+        tag = "✓" if should_recall else "✗"
+
+        print(f"  [{tag}] {qlabel:10s}  top1={top1:.0%} top2={top2:.0%} gap={gap:.0%}  "
+              f"≥5%:{above_5pct} ≥10%:{above_10pct}  → {top_target}")
+
+    # summary: average sharpness
+    total_gap = 0
+    total_top1 = 0
+    for qe, qlabel, _ in query_embs:
+        results = hip.recall(qe, top_k=5)
+        sims = [r.similarity for r in results]
+        total_top1 += sims[0] if sims else 0
+        total_gap += (sims[0] - sims[1]) if len(sims) > 1 else 0
+
+    n = len(query_embs)
+    print(f"\n  avg top1={total_top1/n:.0%}  avg gap={total_gap/n:.0%}")
+    print()
+
+    del hip
+    torch.cuda.empty_cache()