- nocmem Python service (mem/): FastAPI wrapper around NuoNuo's Hopfield-Hebbian memory, with /recall, /ingest, /store, /stats endpoints - NOC integration: auto recall after user message (injected as system msg), async ingest after LLM response (fire-and-forget) - Recall: cosine pre-filter (threshold 0.35) + Hopfield attention (β=32), top_k=3, KV-cache friendly (appended after user msg, not in system prompt) - Ingest: LLM extraction + paraphrase augmentation, heuristic fallback - Wired into main.rs, life.rs (agent done), http.rs (api chat) - Config: optional `nocmem.endpoint` in config.yaml - Includes benchmarks: LongMemEval (R@5=94.0%), efficiency, noise vs scale - Design doc: doc/nocmem.md
105 lines
4.1 KiB
Python
105 lines
4.1 KiB
Python
"""Test Hopfield attention sharpness with different top_k and beta.
|
||
|
||
Goal: find settings that give "either clearly remembered or nothing"
|
||
instead of flat attention across 20 candidates.
|
||
"""
|
||
|
||
import torch
|
||
from sentence_transformers import SentenceTransformer
|
||
from nuonuo.hippocampus import HippocampalMemory
|
||
|
||
DEVICE = "cuda"
|
||
EMBED_DIM = 384
|
||
|
||
print("loading encoder...")
|
||
encoder = SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
|
||
|
||
def emb(text):
|
||
return encoder.encode([text], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0]
|
||
|
||
|
||
# store the same memories in each config
|
||
MEMORIES = [
|
||
("bot的名字叫什么", "bot的名字叫小乖,是Fam给取的"),
|
||
("有哪些工具可以用", "工具有: fam_todo, send_file, spawn_agent, run_shell, run_python, update_memory"),
|
||
("vLLM在5090上的性能", "RTX 5090上vLLM跑gemma只有4.8 tok/s,需要切换到awq_marlin"),
|
||
("repo-vis项目是什么", "repo-vis用Rust后端+Three.js前端的3D代码库可视化,目标支持Linux内核和Pico VR"),
|
||
("repo-vis的性能瓶颈", "Linux内核79K文件,SQLite 1GB上限和O(n)反序列化是瓶颈,需要n-ary tree按需合并"),
|
||
("明天的待办事项", "最紧迫的是emblem scanner的AI Chat和KB部分"),
|
||
("后端切换到了什么", "NOC后端切换到了vLLM,速度变快了"),
|
||
("数据库密码在哪里", "数据库密码存在 /etc/secrets/db.env 文件中"),
|
||
("什么GPU", "服务器有NVIDIA RTX 4090 24GB VRAM"),
|
||
("home有多少log文件", "home目录及子目录下共有960个.log文件"),
|
||
]
|
||
|
||
QUERIES = [
|
||
("repo-vis怎么样了", "repo-vis", True), # should recall clearly
|
||
("数据库密码", "密码", True), # should recall clearly
|
||
("今天天气怎么样", "天气", False), # irrelevant, should recall nothing
|
||
("vllm速度", "vllm", True), # should recall clearly
|
||
("你喜欢吃什么", "吃什么", False), # irrelevant
|
||
("VR支持", "VR", True), # edge case
|
||
]
|
||
|
||
CONFIGS = [
|
||
# (top_k, beta, label)
|
||
(20, 16.0, "baseline (top_k=20, β=16)"),
|
||
(10, 16.0, "top_k=10, β=16"),
|
||
(5, 16.0, "top_k=5, β=16"),
|
||
(20, 32.0, "top_k=20, β=32"),
|
||
(20, 64.0, "top_k=20, β=64"),
|
||
(10, 32.0, "top_k=10, β=32"),
|
||
(5, 32.0, "top_k=5, β=32"),
|
||
(5, 64.0, "top_k=5, β=64"),
|
||
]
|
||
|
||
# pre-embed everything
|
||
mem_embs = [(emb(c), emb(t), c, t) for c, t in MEMORIES]
|
||
query_embs = [(emb(q), label, relevant) for q, label, relevant in QUERIES]
|
||
|
||
print(f"\n{len(MEMORIES)} memories, {len(QUERIES)} queries, {len(CONFIGS)} configs\n")
|
||
|
||
for top_k, beta, label in CONFIGS:
|
||
print(f"{'='*70}")
|
||
print(f" {label}")
|
||
print(f"{'='*70}")
|
||
|
||
hip = HippocampalMemory(
|
||
embed_dim=EMBED_DIM, hopfield_top_k=top_k, beta=beta, device=DEVICE,
|
||
)
|
||
for ce, te, cue_text, target_text in mem_embs:
|
||
hip.store(ce, te, metadata={"cue": cue_text, "target": target_text})
|
||
|
||
for qe, qlabel, should_recall in query_embs:
|
||
results = hip.recall(qe, top_k=5)
|
||
|
||
# show distribution
|
||
sims = [r.similarity for r in results]
|
||
top1 = sims[0] if sims else 0
|
||
top2 = sims[1] if len(sims) > 1 else 0
|
||
gap = top1 - top2 # gap between #1 and #2
|
||
above_5pct = sum(1 for s in sims if s >= 0.05)
|
||
above_10pct = sum(1 for s in sims if s >= 0.10)
|
||
|
||
top_target = results[0].metadata["target"][:40] if results else "—"
|
||
tag = "✓" if should_recall else "✗"
|
||
|
||
print(f" [{tag}] {qlabel:10s} top1={top1:.0%} top2={top2:.0%} gap={gap:.0%} "
|
||
f"≥5%:{above_5pct} ≥10%:{above_10pct} → {top_target}")
|
||
|
||
# summary: average sharpness
|
||
total_gap = 0
|
||
total_top1 = 0
|
||
for qe, qlabel, _ in query_embs:
|
||
results = hip.recall(qe, top_k=5)
|
||
sims = [r.similarity for r in results]
|
||
total_top1 += sims[0] if sims else 0
|
||
total_gap += (sims[0] - sims[1]) if len(sims) > 1 else 0
|
||
|
||
n = len(query_embs)
|
||
print(f"\n avg top1={total_top1/n:.0%} avg gap={total_gap/n:.0%}")
|
||
print()
|
||
|
||
del hip
|
||
torch.cuda.empty_cache()
|