NuoNuo: Hippocampal memory module prototype
Hopfield + Hebbian hybrid memory system for LLMs. Two nights of experiments (16 iterations), validated on LongMemEval (ICLR 2025). Architecture: - Single-hop: Two-Stage Hopfield (NN top-20 → softmax settle) - Multi-hop: Hebbian W matrix with WTA pattern separation - 64% on LongMemEval (500 questions), retrieval-only, no LLM dependency - 4ms latency @ 20K memories, ~1GB VRAM Key findings: - Hopfield attention solved noise tolerance (20% → 100% vs flat Hebbian) - WTA pattern separation enables 20K+ capacity - Multi-hop associative chains (6 hops, CosSim=1.0) — RAG can't do this - MiniLM-L6 is optimal (discrimination gap > absolute similarity) - Paraphrase cue augmentation: 55% → 100% on synthetic, 36% → 64% on benchmark - SNN encoder viable (CosSim 0.99) but not needed for current architecture
This commit is contained in:
237
experiments/exp11_scale_ceiling.py
Normal file
237
experiments/exp11_scale_ceiling.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""Experiment P3: Breaking the 20K 80% ceiling.
|
||||
|
||||
Hypothesis: NN pre-filter (top-20) misses the correct cue at large scale.
|
||||
|
||||
Tests:
|
||||
1. Oracle analysis: is the correct cue in top-K? What K is needed?
|
||||
2. Hierarchical memory: cluster memories, route query to relevant cluster
|
||||
3. Re-ranking: top-K NN → cross-similarity re-rank → Hopfield on re-ranked
|
||||
4. Multiple projections: ensemble of NN lookups with different random projections
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
|
||||
DEVICE = "cuda"
|
||||
|
||||
PAIRS = [
|
||||
("What's the weather like today?", "User checks weather every morning"),
|
||||
("Let's deploy the new version", "Deployment uses GitHub Actions with k3s"),
|
||||
("The database is slow again", "Missing index on users table"),
|
||||
("I need to fix the authentication bug", "JWT tokens with 24h expiry in Redis"),
|
||||
("The API returns 500 errors", "OOM in the Python worker"),
|
||||
("Let's set up monitoring", "Prometheus + Grafana on OCI"),
|
||||
("Tests failing in CI", "CI needs postgres service container"),
|
||||
("Memory usage too high", "Leak in websocket handler"),
|
||||
("Help with Docker setup", "docker-compose for dev, k3s for prod"),
|
||||
("Log files too large", "Logs rotate daily, shipped to Loki"),
|
||||
]
|
||||
|
||||
PARAPHRASES = [
|
||||
"How's the weather?", "Ship the release", "DB performance terrible",
|
||||
"Fix the login issue", "Server errors everywhere", "Need observability",
|
||||
"CI tests breaking", "Service using too much RAM", "Docker config help",
|
||||
"Logs eating disk space",
|
||||
]
|
||||
|
||||
|
||||
def cosine(a, b):
|
||||
return nn.functional.cosine_similarity(a.unsqueeze(0), b.unsqueeze(0)).item()
|
||||
|
||||
|
||||
def load_model():
|
||||
from sentence_transformers import SentenceTransformer
|
||||
return SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
|
||||
|
||||
|
||||
def build_memory(model, n_bg):
|
||||
"""Build memory with test pairs + background."""
|
||||
cue_embs = model.encode([p[0] for p in PAIRS], convert_to_tensor=True,
|
||||
normalize_embeddings=True, device=DEVICE)
|
||||
target_embs = model.encode([p[1] for p in PAIRS], convert_to_tensor=True,
|
||||
normalize_embeddings=True, device=DEVICE)
|
||||
para_embs = model.encode(PARAPHRASES, convert_to_tensor=True,
|
||||
normalize_embeddings=True, device=DEVICE)
|
||||
|
||||
all_cues = list(cue_embs)
|
||||
all_targets = list(target_embs)
|
||||
all_mids = list(range(len(PAIRS)))
|
||||
|
||||
if n_bg > 0:
|
||||
topics = ["server", "db", "api", "fe", "be", "cache",
|
||||
"queue", "net", "store", "auth", "docker", "k8s"]
|
||||
bg_cues = [f"The {topics[i%len(topics)]} has issue {i}" for i in range(n_bg)]
|
||||
bg_targets = [f"Fix {topics[i%len(topics)]} issue {i}" for i in range(n_bg)]
|
||||
bg_c = model.encode(bg_cues, convert_to_tensor=True,
|
||||
normalize_embeddings=True, device=DEVICE, batch_size=256)
|
||||
bg_t = model.encode(bg_targets, convert_to_tensor=True,
|
||||
normalize_embeddings=True, device=DEVICE, batch_size=256)
|
||||
for i in range(n_bg):
|
||||
all_cues.append(bg_c[i])
|
||||
all_targets.append(bg_t[i])
|
||||
all_mids.append(100 + i)
|
||||
|
||||
cue_mat = torch.stack(all_cues)
|
||||
target_mat = torch.stack(all_targets)
|
||||
return cue_mat, target_mat, all_mids, cue_embs, target_embs, para_embs
|
||||
|
||||
|
||||
def test_topk_coverage(model, n_bg_list):
|
||||
"""Is the correct cue in top-K? What K do we need?"""
|
||||
print("=== Test 1: Top-K Coverage Analysis ===\n")
|
||||
|
||||
for n_bg in n_bg_list:
|
||||
cue_mat, target_mat, mids, cue_embs, target_embs, para_embs = build_memory(model, n_bg)
|
||||
|
||||
for K in [5, 10, 20, 50, 100, 200]:
|
||||
in_topk = 0
|
||||
for i in range(len(PARAPHRASES)):
|
||||
sims = para_embs[i] @ cue_mat.T
|
||||
_, top_idx = sims.topk(min(K, len(mids)))
|
||||
top_mids = [mids[j] for j in top_idx.tolist()]
|
||||
if i in top_mids:
|
||||
in_topk += 1
|
||||
|
||||
n = len(PARAPHRASES)
|
||||
print(f" N={n_bg+len(PAIRS):>6}, K={K:>3}: "
|
||||
f"{in_topk}/{n} ({in_topk/n:.0%}) correct cue in top-K")
|
||||
print()
|
||||
|
||||
|
||||
def test_two_stage_topk(model, n_bg):
|
||||
"""Vary K in two-stage Hopfield to find optimal."""
|
||||
print(f"\n=== Test 2: Two-Stage K Optimization (bg={n_bg}) ===\n")
|
||||
|
||||
cue_mat, target_mat, mids, cue_embs, target_embs, para_embs = build_memory(model, n_bg)
|
||||
|
||||
for K in [5, 10, 20, 50, 100, 200]:
|
||||
correct = 0
|
||||
for i in range(len(PARAPHRASES)):
|
||||
sims = para_embs[i] @ cue_mat.T
|
||||
k = min(K, len(mids))
|
||||
_, top_idx = sims.topk(k)
|
||||
cand_cues = cue_mat[top_idx]
|
||||
cand_targets = target_mat[top_idx]
|
||||
cand_mids = [mids[j] for j in top_idx.tolist()]
|
||||
|
||||
# Hopfield settle
|
||||
xi = para_embs[i]
|
||||
for _ in range(3):
|
||||
scores = 16.0 * (xi @ cand_cues.T)
|
||||
attn = torch.softmax(scores, dim=0)
|
||||
xi = attn @ cand_cues
|
||||
xi = nn.functional.normalize(xi, dim=0)
|
||||
|
||||
scores = 16.0 * (xi @ cand_cues.T)
|
||||
attn = torch.softmax(scores, dim=0)
|
||||
|
||||
mid_scores = {}
|
||||
for j, mid in enumerate(cand_mids):
|
||||
mid_scores[mid] = mid_scores.get(mid, 0) + attn[j].item()
|
||||
|
||||
best_mid = max(mid_scores, key=mid_scores.get)
|
||||
if best_mid == i:
|
||||
correct += 1
|
||||
|
||||
n = len(PARAPHRASES)
|
||||
print(f" K={K:>3}: {correct}/{n} ({correct/n:.0%})")
|
||||
|
||||
|
||||
def test_hierarchical(model, n_bg):
|
||||
"""Cluster memories by topic, route query to relevant cluster."""
|
||||
print(f"\n=== Test 3: Hierarchical Memory (bg={n_bg}) ===\n")
|
||||
|
||||
cue_mat, target_mat, mids, cue_embs, target_embs, para_embs = build_memory(model, n_bg)
|
||||
|
||||
# Simple clustering: k-means on cue embeddings
|
||||
from torch import cdist
|
||||
n_clusters = max(10, (n_bg + len(PAIRS)) // 100)
|
||||
|
||||
# K-means (simple implementation)
|
||||
N = cue_mat.shape[0]
|
||||
centroids = cue_mat[torch.randperm(N)[:n_clusters]].clone()
|
||||
|
||||
for _ in range(20):
|
||||
dists = 1 - cue_mat @ centroids.T # cosine distance
|
||||
assignments = dists.argmin(dim=1)
|
||||
for c in range(n_clusters):
|
||||
mask = assignments == c
|
||||
if mask.sum() > 0:
|
||||
centroids[c] = nn.functional.normalize(cue_mat[mask].mean(dim=0), dim=0)
|
||||
|
||||
# Route query to top-3 clusters, then Hopfield within
|
||||
correct = 0
|
||||
for i in range(len(PARAPHRASES)):
|
||||
# Find relevant clusters
|
||||
cluster_sims = para_embs[i] @ centroids.T
|
||||
top_clusters = cluster_sims.topk(3).indices
|
||||
|
||||
# Gather candidates from top clusters
|
||||
cand_idx = []
|
||||
for c in top_clusters:
|
||||
cluster_members = (assignments == c).nonzero().squeeze(-1).tolist()
|
||||
cand_idx.extend(cluster_members)
|
||||
cand_idx = list(set(cand_idx))
|
||||
|
||||
if not cand_idx:
|
||||
continue
|
||||
|
||||
# Hopfield on candidates
|
||||
cand_cues = cue_mat[cand_idx]
|
||||
cand_targets = target_mat[cand_idx]
|
||||
cand_mids = [mids[j] for j in cand_idx]
|
||||
|
||||
K = min(20, len(cand_idx))
|
||||
sims = para_embs[i] @ cand_cues.T
|
||||
_, top_local = sims.topk(K)
|
||||
|
||||
local_cues = cand_cues[top_local]
|
||||
local_mids = [cand_mids[j] for j in top_local.tolist()]
|
||||
|
||||
xi = para_embs[i]
|
||||
for _ in range(3):
|
||||
scores = 16.0 * (xi @ local_cues.T)
|
||||
attn = torch.softmax(scores, dim=0)
|
||||
xi = attn @ local_cues
|
||||
xi = nn.functional.normalize(xi, dim=0)
|
||||
|
||||
scores = 16.0 * (xi @ local_cues.T)
|
||||
attn = torch.softmax(scores, dim=0)
|
||||
mid_scores = {}
|
||||
for j, mid in enumerate(local_mids):
|
||||
mid_scores[mid] = mid_scores.get(mid, 0) + attn[j].item()
|
||||
|
||||
best_mid = max(mid_scores, key=mid_scores.get)
|
||||
if best_mid == i:
|
||||
correct += 1
|
||||
|
||||
n = len(PARAPHRASES)
|
||||
print(f" Hierarchical (clusters={n_clusters}): {correct}/{n} ({correct/n:.0%})")
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("Experiment P3: Breaking the 20K Ceiling")
|
||||
print("=" * 60)
|
||||
|
||||
model = load_model()
|
||||
|
||||
# Test 1: Top-K coverage
|
||||
test_topk_coverage(model, [0, 500, 2000, 5000, 10000, 20000])
|
||||
|
||||
# Test 2: K optimization
|
||||
for bg in [2000, 10000, 20000]:
|
||||
test_two_stage_topk(model, bg)
|
||||
|
||||
# Test 3: Hierarchical
|
||||
for bg in [2000, 10000, 20000]:
|
||||
test_hierarchical(model, bg)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user