Hopfield + Hebbian hybrid memory system for LLMs. Two nights of experiments (16 iterations), validated on LongMemEval (ICLR 2025). Architecture: - Single-hop: Two-Stage Hopfield (NN top-20 → softmax settle) - Multi-hop: Hebbian W matrix with WTA pattern separation - 64% on LongMemEval (500 questions), retrieval-only, no LLM dependency - 4ms latency @ 20K memories, ~1GB VRAM Key findings: - Hopfield attention solved noise tolerance (20% → 100% vs flat Hebbian) - WTA pattern separation enables 20K+ capacity - Multi-hop associative chains (6 hops, CosSim=1.0) — RAG can't do this - MiniLM-L6 is optimal (discrimination gap > absolute similarity) - Paraphrase cue augmentation: 55% → 100% on synthetic, 36% → 64% on benchmark - SNN encoder viable (CosSim 0.99) but not needed for current architecture
229 lines
8.2 KiB
Python
229 lines
8.2 KiB
Python
"""Experiment 4c: Find optimal config for real-world use.
|
|
|
|
From exp04b: k=50 gives 95% paraphrase recall (best).
|
|
Need to verify capacity is still sufficient at k=50.
|
|
Also: test with more realistic memory counts (100-1000).
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import numpy as np
|
|
|
|
DEVICE = "cuda"
|
|
RESULTS_DIR = Path(__file__).parent.parent / "doc"
|
|
|
|
|
|
def cosine(a, b):
|
|
if a.norm() == 0 or b.norm() == 0:
|
|
return 0.0
|
|
return nn.functional.cosine_similarity(a.unsqueeze(0), b.unsqueeze(0)).item()
|
|
|
|
|
|
def winner_take_all(x, k):
|
|
_, idx = x.topk(k, dim=-1)
|
|
out = torch.zeros_like(x)
|
|
out.scatter_(-1, idx, 1.0)
|
|
return out
|
|
|
|
|
|
class UnifiedHebbianMemory:
|
|
def __init__(self, input_dim, code_dim, k):
|
|
self.k = k
|
|
self.code_dim = code_dim
|
|
self.proj = (torch.randn(input_dim, code_dim, device=DEVICE)
|
|
* (1.0 / input_dim**0.5))
|
|
self.W = torch.zeros(code_dim, code_dim, device=DEVICE)
|
|
|
|
def sep(self, x):
|
|
return winner_take_all(x @ self.proj, self.k)
|
|
|
|
def learn(self, cue_emb, target_emb):
|
|
self.W += torch.outer(self.sep(target_emb), self.sep(cue_emb))
|
|
|
|
def recall(self, query_emb):
|
|
code = self.sep(query_emb)
|
|
raw = self.W @ code
|
|
return winner_take_all(raw, self.k)
|
|
|
|
|
|
def test_capacity_with_real_embeddings(model, code_dim, k, max_memories=2000):
|
|
"""Generate lots of diverse sentence pairs and test recall."""
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
# Generate diverse sentences programmatically
|
|
topics = [
|
|
"deploy", "database", "API", "testing", "monitoring", "security",
|
|
"frontend", "backend", "caching", "logging", "backup", "server",
|
|
"CI/CD", "Docker", "Kubernetes", "microservice", "authentication",
|
|
"performance", "debugging", "refactoring"
|
|
]
|
|
actions = [
|
|
"is broken", "needs updating", "has a bug", "was configured wrong",
|
|
"needs optimization", "requires migration", "should be refactored",
|
|
"has a memory leak", "is timing out", "needs documentation"
|
|
]
|
|
facts = [
|
|
"was fixed last week by adding an index",
|
|
"uses the new v3 API endpoint",
|
|
"is scheduled for maintenance on Friday",
|
|
"requires admin access to modify",
|
|
"has a known issue with large payloads",
|
|
"was migrated from AWS to GCP",
|
|
"needs Python 3.12 or higher",
|
|
"uses Redis for session storage",
|
|
"has rate limiting at 1000 req/min",
|
|
"is monitored by PagerDuty"
|
|
]
|
|
|
|
cue_sentences = []
|
|
target_sentences = []
|
|
for i in range(max_memories):
|
|
topic = topics[i % len(topics)]
|
|
action = actions[i % len(actions)]
|
|
fact = facts[i % len(facts)]
|
|
idx = i // (len(topics) * len(actions))
|
|
|
|
cue_sentences.append(f"The {topic} system {action} (issue #{i})")
|
|
target_sentences.append(f"{topic} {fact}, ticket #{i}, priority {idx}")
|
|
|
|
embed_dim = model.get_sentence_embedding_dimension()
|
|
mem = UnifiedHebbianMemory(embed_dim, code_dim, k)
|
|
|
|
# Encode in batches
|
|
batch_size = 256
|
|
checkpoints = [50, 100, 200, 500, 1000, 2000]
|
|
all_cue_embs = []
|
|
all_target_embs = []
|
|
|
|
print(f" Config: code_dim={code_dim}, k={k}")
|
|
|
|
for start in range(0, max_memories, batch_size):
|
|
end = min(start + batch_size, max_memories)
|
|
cue_embs = model.encode(cue_sentences[start:end],
|
|
convert_to_tensor=True,
|
|
normalize_embeddings=True, device=DEVICE)
|
|
target_embs = model.encode(target_sentences[start:end],
|
|
convert_to_tensor=True,
|
|
normalize_embeddings=True, device=DEVICE)
|
|
|
|
for i in range(cue_embs.shape[0]):
|
|
mem.learn(cue_embs[i], target_embs[i])
|
|
all_cue_embs.append(cue_embs[i])
|
|
all_target_embs.append(target_embs[i])
|
|
|
|
total = len(all_cue_embs)
|
|
if total in checkpoints:
|
|
# Test on random sample
|
|
sample_n = min(100, total)
|
|
indices = torch.randperm(total)[:sample_n].tolist()
|
|
|
|
correct = 0
|
|
for idx in indices:
|
|
recalled = mem.recall(all_cue_embs[idx])
|
|
target_code = mem.sep(all_target_embs[idx])
|
|
if cosine(recalled, target_code) > 0.5:
|
|
correct += 1
|
|
|
|
w_norm = mem.W.norm().item()
|
|
print(f" N={total:>5}: Recall={correct}/{sample_n} "
|
|
f"({correct/sample_n:.0%}), W_norm={w_norm:.0f}")
|
|
|
|
|
|
def test_paraphrase_at_scale(model, code_dim, k, n_memories):
|
|
"""Add many memories, then test paraphrase recall on a subset."""
|
|
embed_dim = model.get_sentence_embedding_dimension()
|
|
mem = UnifiedHebbianMemory(embed_dim, code_dim, k)
|
|
|
|
# Add background memories (noise)
|
|
bg_cues = [f"Background task number {i} about topic {i%20}" for i in range(n_memories)]
|
|
bg_targets = [f"Background fact {i} with detail {i%10}" for i in range(n_memories)]
|
|
|
|
bg_cue_embs = model.encode(bg_cues, convert_to_tensor=True,
|
|
normalize_embeddings=True, device=DEVICE,
|
|
batch_size=256)
|
|
bg_target_embs = model.encode(bg_targets, convert_to_tensor=True,
|
|
normalize_embeddings=True, device=DEVICE,
|
|
batch_size=256)
|
|
|
|
for i in range(n_memories):
|
|
mem.learn(bg_cue_embs[i], bg_target_embs[i])
|
|
|
|
# Now add our specific test memories
|
|
test_pairs = [
|
|
("What's the weather like today?", "User prefers to check weather every morning"),
|
|
("Let's deploy the new version", "The deployment pipeline uses GitHub Actions with k3s"),
|
|
("The database is slow again", "Missing index on users table caused slowdown last time"),
|
|
("I need to fix the auth bug", "Auth service uses JWT tokens with 24h expiry in Redis"),
|
|
("The API returns 500 errors", "Last 500 was caused by OOM in the Python worker"),
|
|
]
|
|
paraphrases = [
|
|
"How's the weather outside?",
|
|
"We should push the new release",
|
|
"DB performance is terrible",
|
|
"There's a login bug to fix",
|
|
"Getting internal server errors",
|
|
]
|
|
|
|
test_cue_embs = model.encode([p[0] for p in test_pairs],
|
|
convert_to_tensor=True,
|
|
normalize_embeddings=True, device=DEVICE)
|
|
test_target_embs = model.encode([p[1] for p in test_pairs],
|
|
convert_to_tensor=True,
|
|
normalize_embeddings=True, device=DEVICE)
|
|
para_embs = model.encode(paraphrases, convert_to_tensor=True,
|
|
normalize_embeddings=True, device=DEVICE)
|
|
|
|
for i in range(len(test_pairs)):
|
|
mem.learn(test_cue_embs[i], test_target_embs[i])
|
|
|
|
# Test exact recall
|
|
exact_correct = 0
|
|
for i in range(len(test_pairs)):
|
|
recalled = mem.recall(test_cue_embs[i])
|
|
tc = mem.sep(test_target_embs[i])
|
|
if cosine(recalled, tc) > 0.5:
|
|
exact_correct += 1
|
|
|
|
# Test paraphrase recall
|
|
para_correct = 0
|
|
for i in range(len(paraphrases)):
|
|
recalled = mem.recall(para_embs[i])
|
|
tc = mem.sep(test_target_embs[i])
|
|
if cosine(recalled, tc) > 0.5:
|
|
para_correct += 1
|
|
|
|
n = len(test_pairs)
|
|
print(f" bg={n_memories}, code={code_dim}, k={k}: "
|
|
f"Exact={exact_correct}/{n}, Para={para_correct}/{n}")
|
|
return exact_correct / n, para_correct / n
|
|
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("Experiment 4c: Optimal Config + Scale Testing")
|
|
print("=" * 60)
|
|
|
|
from sentence_transformers import SentenceTransformer
|
|
model = SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
|
|
|
|
# Test 1: Capacity with real embeddings
|
|
print("\n=== Capacity Test ===")
|
|
for code_dim, k in [(8192, 50), (16384, 50), (16384, 20), (32768, 50)]:
|
|
test_capacity_with_real_embeddings(model, code_dim, k, max_memories=2000)
|
|
print()
|
|
|
|
# Test 2: Paraphrase at scale
|
|
print("\n=== Paraphrase Recall at Scale ===")
|
|
for n_bg in [0, 100, 500, 1000]:
|
|
for code_dim, k in [(8192, 50), (16384, 50)]:
|
|
test_paraphrase_at_scale(model, code_dim, k, n_bg)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|