Files
noc/mem/benchmarks/sharpness_test.py
Fam Zheng 7000ccda0f add nocmem: auto memory recall + ingest via NuoNuo hippocampal network
- nocmem Python service (mem/): FastAPI wrapper around NuoNuo's
  Hopfield-Hebbian memory, with /recall, /ingest, /store, /stats endpoints
- NOC integration: auto recall after user message (injected as system msg),
  async ingest after LLM response (fire-and-forget)
- Recall: cosine pre-filter (threshold 0.35) + Hopfield attention (β=32),
  top_k=3, KV-cache friendly (appended after user msg, not in system prompt)
- Ingest: LLM extraction + paraphrase augmentation, heuristic fallback
- Wired into main.rs, life.rs (agent done), http.rs (api chat)
- Config: optional `nocmem.endpoint` in config.yaml
- Includes benchmarks: LongMemEval (R@5=94.0%), efficiency, noise vs scale
- Design doc: doc/nocmem.md
2026-04-11 12:24:48 +01:00

105 lines
4.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Test Hopfield attention sharpness with different top_k and beta.
Goal: find settings that give "either clearly remembered or nothing"
instead of flat attention across 20 candidates.
"""
import torch
from sentence_transformers import SentenceTransformer
from nuonuo.hippocampus import HippocampalMemory
DEVICE = "cuda"
EMBED_DIM = 384
print("loading encoder...")
encoder = SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
def emb(text):
return encoder.encode([text], convert_to_tensor=True, normalize_embeddings=True, device=DEVICE)[0]
# store the same memories in each config
MEMORIES = [
("bot的名字叫什么", "bot的名字叫小乖是Fam给取的"),
("有哪些工具可以用", "工具有: fam_todo, send_file, spawn_agent, run_shell, run_python, update_memory"),
("vLLM在5090上的性能", "RTX 5090上vLLM跑gemma只有4.8 tok/s需要切换到awq_marlin"),
("repo-vis项目是什么", "repo-vis用Rust后端+Three.js前端的3D代码库可视化目标支持Linux内核和Pico VR"),
("repo-vis的性能瓶颈", "Linux内核79K文件SQLite 1GB上限和O(n)反序列化是瓶颈需要n-ary tree按需合并"),
("明天的待办事项", "最紧迫的是emblem scanner的AI Chat和KB部分"),
("后端切换到了什么", "NOC后端切换到了vLLM速度变快了"),
("数据库密码在哪里", "数据库密码存在 /etc/secrets/db.env 文件中"),
("什么GPU", "服务器有NVIDIA RTX 4090 24GB VRAM"),
("home有多少log文件", "home目录及子目录下共有960个.log文件"),
]
QUERIES = [
("repo-vis怎么样了", "repo-vis", True), # should recall clearly
("数据库密码", "密码", True), # should recall clearly
("今天天气怎么样", "天气", False), # irrelevant, should recall nothing
("vllm速度", "vllm", True), # should recall clearly
("你喜欢吃什么", "吃什么", False), # irrelevant
("VR支持", "VR", True), # edge case
]
CONFIGS = [
# (top_k, beta, label)
(20, 16.0, "baseline (top_k=20, β=16)"),
(10, 16.0, "top_k=10, β=16"),
(5, 16.0, "top_k=5, β=16"),
(20, 32.0, "top_k=20, β=32"),
(20, 64.0, "top_k=20, β=64"),
(10, 32.0, "top_k=10, β=32"),
(5, 32.0, "top_k=5, β=32"),
(5, 64.0, "top_k=5, β=64"),
]
# pre-embed everything
mem_embs = [(emb(c), emb(t), c, t) for c, t in MEMORIES]
query_embs = [(emb(q), label, relevant) for q, label, relevant in QUERIES]
print(f"\n{len(MEMORIES)} memories, {len(QUERIES)} queries, {len(CONFIGS)} configs\n")
for top_k, beta, label in CONFIGS:
print(f"{'='*70}")
print(f" {label}")
print(f"{'='*70}")
hip = HippocampalMemory(
embed_dim=EMBED_DIM, hopfield_top_k=top_k, beta=beta, device=DEVICE,
)
for ce, te, cue_text, target_text in mem_embs:
hip.store(ce, te, metadata={"cue": cue_text, "target": target_text})
for qe, qlabel, should_recall in query_embs:
results = hip.recall(qe, top_k=5)
# show distribution
sims = [r.similarity for r in results]
top1 = sims[0] if sims else 0
top2 = sims[1] if len(sims) > 1 else 0
gap = top1 - top2 # gap between #1 and #2
above_5pct = sum(1 for s in sims if s >= 0.05)
above_10pct = sum(1 for s in sims if s >= 0.10)
top_target = results[0].metadata["target"][:40] if results else ""
tag = "" if should_recall else ""
print(f" [{tag}] {qlabel:10s} top1={top1:.0%} top2={top2:.0%} gap={gap:.0%} "
f"≥5%:{above_5pct} ≥10%:{above_10pct}{top_target}")
# summary: average sharpness
total_gap = 0
total_top1 = 0
for qe, qlabel, _ in query_embs:
results = hip.recall(qe, top_k=5)
sims = [r.similarity for r in results]
total_top1 += sims[0] if sims else 0
total_gap += (sims[0] - sims[1]) if len(sims) > 1 else 0
n = len(query_embs)
print(f"\n avg top1={total_top1/n:.0%} avg gap={total_gap/n:.0%}")
print()
del hip
torch.cuda.empty_cache()