Files
nuonuo/experiments/exp04c_optimal_config.py
Fam Zheng d923aa1e31 NuoNuo: Hippocampal memory module prototype
Hopfield + Hebbian hybrid memory system for LLMs.
Two nights of experiments (16 iterations), validated on LongMemEval (ICLR 2025).

Architecture:
- Single-hop: Two-Stage Hopfield (NN top-20 → softmax settle)
- Multi-hop: Hebbian W matrix with WTA pattern separation
- 64% on LongMemEval (500 questions), retrieval-only, no LLM dependency
- 4ms latency @ 20K memories, ~1GB VRAM

Key findings:
- Hopfield attention solved noise tolerance (20% → 100% vs flat Hebbian)
- WTA pattern separation enables 20K+ capacity
- Multi-hop associative chains (6 hops, CosSim=1.0) — RAG can't do this
- MiniLM-L6 is optimal (discrimination gap > absolute similarity)
- Paraphrase cue augmentation: 55% → 100% on synthetic, 36% → 64% on benchmark
- SNN encoder viable (CosSim 0.99) but not needed for current architecture
2026-04-07 10:37:24 +01:00

229 lines
8.2 KiB
Python

"""Experiment 4c: Find optimal config for real-world use.
From exp04b: k=50 gives 95% paraphrase recall (best).
Need to verify capacity is still sufficient at k=50.
Also: test with more realistic memory counts (100-1000).
"""
import sys
import time
import json
from pathlib import Path
import torch
import torch.nn as nn
import numpy as np
DEVICE = "cuda"
RESULTS_DIR = Path(__file__).parent.parent / "doc"
def cosine(a, b):
if a.norm() == 0 or b.norm() == 0:
return 0.0
return nn.functional.cosine_similarity(a.unsqueeze(0), b.unsqueeze(0)).item()
def winner_take_all(x, k):
_, idx = x.topk(k, dim=-1)
out = torch.zeros_like(x)
out.scatter_(-1, idx, 1.0)
return out
class UnifiedHebbianMemory:
def __init__(self, input_dim, code_dim, k):
self.k = k
self.code_dim = code_dim
self.proj = (torch.randn(input_dim, code_dim, device=DEVICE)
* (1.0 / input_dim**0.5))
self.W = torch.zeros(code_dim, code_dim, device=DEVICE)
def sep(self, x):
return winner_take_all(x @ self.proj, self.k)
def learn(self, cue_emb, target_emb):
self.W += torch.outer(self.sep(target_emb), self.sep(cue_emb))
def recall(self, query_emb):
code = self.sep(query_emb)
raw = self.W @ code
return winner_take_all(raw, self.k)
def test_capacity_with_real_embeddings(model, code_dim, k, max_memories=2000):
"""Generate lots of diverse sentence pairs and test recall."""
from sentence_transformers import SentenceTransformer
# Generate diverse sentences programmatically
topics = [
"deploy", "database", "API", "testing", "monitoring", "security",
"frontend", "backend", "caching", "logging", "backup", "server",
"CI/CD", "Docker", "Kubernetes", "microservice", "authentication",
"performance", "debugging", "refactoring"
]
actions = [
"is broken", "needs updating", "has a bug", "was configured wrong",
"needs optimization", "requires migration", "should be refactored",
"has a memory leak", "is timing out", "needs documentation"
]
facts = [
"was fixed last week by adding an index",
"uses the new v3 API endpoint",
"is scheduled for maintenance on Friday",
"requires admin access to modify",
"has a known issue with large payloads",
"was migrated from AWS to GCP",
"needs Python 3.12 or higher",
"uses Redis for session storage",
"has rate limiting at 1000 req/min",
"is monitored by PagerDuty"
]
cue_sentences = []
target_sentences = []
for i in range(max_memories):
topic = topics[i % len(topics)]
action = actions[i % len(actions)]
fact = facts[i % len(facts)]
idx = i // (len(topics) * len(actions))
cue_sentences.append(f"The {topic} system {action} (issue #{i})")
target_sentences.append(f"{topic} {fact}, ticket #{i}, priority {idx}")
embed_dim = model.get_sentence_embedding_dimension()
mem = UnifiedHebbianMemory(embed_dim, code_dim, k)
# Encode in batches
batch_size = 256
checkpoints = [50, 100, 200, 500, 1000, 2000]
all_cue_embs = []
all_target_embs = []
print(f" Config: code_dim={code_dim}, k={k}")
for start in range(0, max_memories, batch_size):
end = min(start + batch_size, max_memories)
cue_embs = model.encode(cue_sentences[start:end],
convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)
target_embs = model.encode(target_sentences[start:end],
convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)
for i in range(cue_embs.shape[0]):
mem.learn(cue_embs[i], target_embs[i])
all_cue_embs.append(cue_embs[i])
all_target_embs.append(target_embs[i])
total = len(all_cue_embs)
if total in checkpoints:
# Test on random sample
sample_n = min(100, total)
indices = torch.randperm(total)[:sample_n].tolist()
correct = 0
for idx in indices:
recalled = mem.recall(all_cue_embs[idx])
target_code = mem.sep(all_target_embs[idx])
if cosine(recalled, target_code) > 0.5:
correct += 1
w_norm = mem.W.norm().item()
print(f" N={total:>5}: Recall={correct}/{sample_n} "
f"({correct/sample_n:.0%}), W_norm={w_norm:.0f}")
def test_paraphrase_at_scale(model, code_dim, k, n_memories):
"""Add many memories, then test paraphrase recall on a subset."""
embed_dim = model.get_sentence_embedding_dimension()
mem = UnifiedHebbianMemory(embed_dim, code_dim, k)
# Add background memories (noise)
bg_cues = [f"Background task number {i} about topic {i%20}" for i in range(n_memories)]
bg_targets = [f"Background fact {i} with detail {i%10}" for i in range(n_memories)]
bg_cue_embs = model.encode(bg_cues, convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE,
batch_size=256)
bg_target_embs = model.encode(bg_targets, convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE,
batch_size=256)
for i in range(n_memories):
mem.learn(bg_cue_embs[i], bg_target_embs[i])
# Now add our specific test memories
test_pairs = [
("What's the weather like today?", "User prefers to check weather every morning"),
("Let's deploy the new version", "The deployment pipeline uses GitHub Actions with k3s"),
("The database is slow again", "Missing index on users table caused slowdown last time"),
("I need to fix the auth bug", "Auth service uses JWT tokens with 24h expiry in Redis"),
("The API returns 500 errors", "Last 500 was caused by OOM in the Python worker"),
]
paraphrases = [
"How's the weather outside?",
"We should push the new release",
"DB performance is terrible",
"There's a login bug to fix",
"Getting internal server errors",
]
test_cue_embs = model.encode([p[0] for p in test_pairs],
convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)
test_target_embs = model.encode([p[1] for p in test_pairs],
convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)
para_embs = model.encode(paraphrases, convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)
for i in range(len(test_pairs)):
mem.learn(test_cue_embs[i], test_target_embs[i])
# Test exact recall
exact_correct = 0
for i in range(len(test_pairs)):
recalled = mem.recall(test_cue_embs[i])
tc = mem.sep(test_target_embs[i])
if cosine(recalled, tc) > 0.5:
exact_correct += 1
# Test paraphrase recall
para_correct = 0
for i in range(len(paraphrases)):
recalled = mem.recall(para_embs[i])
tc = mem.sep(test_target_embs[i])
if cosine(recalled, tc) > 0.5:
para_correct += 1
n = len(test_pairs)
print(f" bg={n_memories}, code={code_dim}, k={k}: "
f"Exact={exact_correct}/{n}, Para={para_correct}/{n}")
return exact_correct / n, para_correct / n
def main():
print("=" * 60)
print("Experiment 4c: Optimal Config + Scale Testing")
print("=" * 60)
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
# Test 1: Capacity with real embeddings
print("\n=== Capacity Test ===")
for code_dim, k in [(8192, 50), (16384, 50), (16384, 20), (32768, 50)]:
test_capacity_with_real_embeddings(model, code_dim, k, max_memories=2000)
print()
# Test 2: Paraphrase at scale
print("\n=== Paraphrase Recall at Scale ===")
for n_bg in [0, 100, 500, 1000]:
for code_dim, k in [(8192, 50), (16384, 50)]:
test_paraphrase_at_scale(model, code_dim, k, n_bg)
if __name__ == "__main__":
main()