Files
nuonuo/experiments/exp07e_cue_augmentation.py
Fam Zheng d923aa1e31 NuoNuo: Hippocampal memory module prototype
Hopfield + Hebbian hybrid memory system for LLMs.
Two nights of experiments (16 iterations), validated on LongMemEval (ICLR 2025).

Architecture:
- Single-hop: Two-Stage Hopfield (NN top-20 → softmax settle)
- Multi-hop: Hebbian W matrix with WTA pattern separation
- 64% on LongMemEval (500 questions), retrieval-only, no LLM dependency
- 4ms latency @ 20K memories, ~1GB VRAM

Key findings:
- Hopfield attention solved noise tolerance (20% → 100% vs flat Hebbian)
- WTA pattern separation enables 20K+ capacity
- Multi-hop associative chains (6 hops, CosSim=1.0) — RAG can't do this
- MiniLM-L6 is optimal (discrimination gap > absolute similarity)
- Paraphrase cue augmentation: 55% → 100% on synthetic, 36% → 64% on benchmark
- SNN encoder viable (CosSim 0.99) but not needed for current architecture
2026-04-07 10:37:24 +01:00

261 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Experiment 7e: Cue augmentation to overcome embedding model limitations.
Idea: When storing a memory, also store augmented versions of the cue.
If the user says "The database is slow", also store:
- The embedding with added noise (gaussian augmentation)
- A shifted version toward common paraphrase patterns
This increases the "catchment basin" of each memory without changing the model.
Also test: using the LLM itself to generate paraphrases (simulated here).
"""
import sys
import time
from pathlib import Path
import torch
import torch.nn as nn
import numpy as np
DEVICE = "cuda"
def cosine(a, b):
return nn.functional.cosine_similarity(a.unsqueeze(0), b.unsqueeze(0)).item()
class AugmentedHopfield:
"""Hopfield with cue augmentation.
Each memory stores N augmented cue embeddings, all pointing to the same target.
During recall, any of the augmented cues can match.
"""
def __init__(self, beta=16.0, top_k=20, n_augments=5, noise_std=0.15):
self.beta = beta
self.top_k = top_k
self.n_augments = n_augments
self.noise_std = noise_std
self.cue_embs = []
self.target_embs = []
self.memory_ids = [] # Which original memory each entry belongs to
def learn(self, cue_emb, target_emb, memory_id=None):
"""Store with augmented cues."""
mid = memory_id if memory_id is not None else len(set(self.memory_ids))
# Original
self.cue_embs.append(cue_emb.detach())
self.target_embs.append(target_emb.detach())
self.memory_ids.append(mid)
# Augmented: add noise and renormalize
for _ in range(self.n_augments):
noisy = cue_emb + torch.randn_like(cue_emb) * self.noise_std
noisy = nn.functional.normalize(noisy, dim=0)
self.cue_embs.append(noisy)
self.target_embs.append(target_emb.detach())
self.memory_ids.append(mid)
def learn_with_paraphrases(self, cue_embs_list, target_emb, memory_id=None):
"""Store multiple cue embeddings for the same target.
cue_embs_list: list of embeddings (original + paraphrases)
"""
mid = memory_id if memory_id is not None else len(set(self.memory_ids))
for ce in cue_embs_list:
self.cue_embs.append(ce.detach())
self.target_embs.append(target_emb.detach())
self.memory_ids.append(mid)
def recall(self, query_emb, steps=3):
cue_mat = torch.stack(self.cue_embs)
target_mat = torch.stack(self.target_embs)
N = cue_mat.shape[0]
# Stage 1: top-K
k = min(self.top_k, N)
sims = query_emb @ cue_mat.T
_, top_idx = sims.topk(k)
cand_cues = cue_mat[top_idx]
cand_targets = target_mat[top_idx]
# Stage 2: Hopfield settle
xi = query_emb
for _ in range(steps):
scores = self.beta * (xi @ cand_cues.T)
attn = torch.softmax(scores, dim=0)
xi = attn @ cand_cues
xi = nn.functional.normalize(xi, dim=0)
scores = self.beta * (xi @ cand_cues.T)
attn = torch.softmax(scores, dim=0)
target = attn @ cand_targets
return nn.functional.normalize(target, dim=0)
def load_model():
from sentence_transformers import SentenceTransformer
return SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
def test_augmentation(model):
"""Compare: no augmentation vs noise augmentation vs paraphrase augmentation."""
print("\n=== Augmentation Comparison (20 pairs, 2000 bg) ===")
pairs = [
("What's the weather like today?", "User checks weather every morning"),
("Let's deploy the new version", "Deployment uses GitHub Actions with k3s"),
("The database is slow again", "Missing index on users table"),
("I need to fix the authentication bug", "JWT tokens with 24h expiry in Redis"),
("The API returns 500 errors", "OOM in the Python worker"),
("Let's set up monitoring", "Prometheus + Grafana on OCI"),
("Tests failing in CI", "CI needs postgres service container"),
("Memory usage too high", "Leak in websocket handler"),
("Help with Docker setup", "docker-compose for dev, k3s for prod"),
("Log files too large", "Logs rotate daily, shipped to Loki"),
("How to add caching?", "Redis available at redis.internal:6379"),
("Frontend loads slowly", "CDN CloudFlare, 1h TTL for assets"),
("Refactor payment module", "Stripe API, webhook in payments/webhook.py"),
("Set up new server", "Ubuntu 22.04, Docker, Tailscale, monitoring"),
("Optimize search", "Elasticsearch v8, recently upgraded"),
("Backup the database", "Daily 3am UTC cron to S3"),
("Configure reverse proxy", "Traefik, not nginx"),
("Team meeting schedule", "Standup 10am London, Mon-Fri"),
("Learn a new programming language", "User has Python+Go, new to systems"),
("Review my pull request", "User prefers small PRs with clear commits"),
]
paraphrases = [
"How's the weather?", "Ship the release", "DB performance terrible",
"Fix the login issue", "Server errors everywhere", "Need observability",
"CI tests breaking", "Service using too much RAM", "Docker config help",
"Logs eating disk space", "Want to add a cache layer", "Website too slow",
"Payment code needs rework", "Provision a new machine", "Search is slow",
"Need a DB backup", "Proxy configuration", "When's the standup?",
"Want to learn Rust", "Check my pull request",
]
# Hand-crafted additional paraphrases for hard cases
extra_paraphrases = {
1: ["Ship the release", "Push to production", "Release the new build"],
3: ["Fix the login issue", "Authentication is broken", "Login doesn't work"],
4: ["Server errors everywhere", "Getting 500s", "Internal server error"],
5: ["Need observability", "Set up alerts", "Monitor the services"],
10: ["Add a cache layer", "Implement caching", "Cache the responses"],
11: ["Website too slow", "Page load time is bad", "Frontend performance"],
13: ["Provision a new machine", "Need a new server", "Set up a new box"],
17: ["When's the standup?", "What time is the meeting?", "Daily sync time?"],
18: ["Want to learn Rust", "Getting into Rust", "Start learning Rust"],
19: ["Check my pull request", "Look at my code changes", "PR review please"],
}
cue_embs = model.encode([p[0] for p in pairs], convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)
target_embs = model.encode([p[1] for p in pairs], convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)
para_embs = model.encode(paraphrases, convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)
# Encode extra paraphrases
extra_embs = {}
for idx, texts in extra_paraphrases.items():
extra_embs[idx] = model.encode(texts, convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE)
# Background
topics = ["server", "database", "API", "frontend", "backend", "cache",
"queue", "network", "storage", "auth", "docker", "kubernetes",
"redis", "nginx", "postgres", "python", "golang", "react",
"terraform", "ansible"]
actions = ["crashed", "is slow", "needs update", "has bug", "timed out",
"needs migration", "needs backup", "has leak", "is down", "needs config"]
bg_cues = [f"The {topics[i%len(topics)]} {actions[i%len(actions)]} (ticket {i})"
for i in range(2000)]
bg_targets = [f"Fix {topics[i%len(topics)]} {actions[i%len(actions)]}: wiki {i}"
for i in range(2000)]
bg_cue_embs = model.encode(bg_cues, convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE, batch_size=256)
bg_target_embs = model.encode(bg_targets, convert_to_tensor=True,
normalize_embeddings=True, device=DEVICE, batch_size=256)
def evaluate(mem, label):
correct = 0
for i in range(len(paraphrases)):
with torch.no_grad():
recalled = mem.recall(para_embs[i])
all_sims = [cosine(recalled, target_embs[j]) for j in range(len(pairs))]
if np.argmax(all_sims) == i:
correct += 1
n = len(paraphrases)
print(f" {label}: {correct}/{n} ({correct/n:.0%})")
return correct / n
# Method 1: No augmentation (baseline)
mem1 = AugmentedHopfield(n_augments=0)
for i in range(len(pairs)):
mem1.learn(cue_embs[i], target_embs[i], memory_id=i)
for i in range(2000):
mem1.learn(bg_cue_embs[i], bg_target_embs[i], memory_id=100+i)
evaluate(mem1, "No augmentation")
# Method 2: Noise augmentation (5 copies)
for noise in [0.1, 0.15, 0.2, 0.3]:
mem2 = AugmentedHopfield(n_augments=5, noise_std=noise)
for i in range(len(pairs)):
mem2.learn(cue_embs[i], target_embs[i], memory_id=i)
for i in range(2000):
# Don't augment background
mem2.cue_embs.append(bg_cue_embs[i])
mem2.target_embs.append(bg_target_embs[i])
mem2.memory_ids.append(100+i)
evaluate(mem2, f"Noise aug (σ={noise}, n=5)")
# Method 3: Noise augmentation (20 copies)
mem3 = AugmentedHopfield(n_augments=20, noise_std=0.15)
for i in range(len(pairs)):
mem3.learn(cue_embs[i], target_embs[i], memory_id=i)
for i in range(2000):
mem3.cue_embs.append(bg_cue_embs[i])
mem3.target_embs.append(bg_target_embs[i])
mem3.memory_ids.append(100+i)
evaluate(mem3, "Noise aug (σ=0.15, n=20)")
# Method 4: Paraphrase augmentation (hand-crafted extras)
mem4 = AugmentedHopfield(n_augments=0)
for i in range(len(pairs)):
cue_list = [cue_embs[i]]
if i in extra_embs:
cue_list.extend([e for e in extra_embs[i]])
mem4.learn_with_paraphrases(cue_list, target_embs[i], memory_id=i)
for i in range(2000):
mem4.cue_embs.append(bg_cue_embs[i])
mem4.target_embs.append(bg_target_embs[i])
mem4.memory_ids.append(100+i)
evaluate(mem4, "Paraphrase aug (hand-crafted)")
# Method 5: Noise + Paraphrase combined
mem5 = AugmentedHopfield(n_augments=5, noise_std=0.15)
for i in range(len(pairs)):
cue_list = [cue_embs[i]]
if i in extra_embs:
cue_list.extend([e for e in extra_embs[i]])
mem5.learn_with_paraphrases(cue_list, target_embs[i], memory_id=i)
for i in range(2000):
mem5.cue_embs.append(bg_cue_embs[i])
mem5.target_embs.append(bg_target_embs[i])
mem5.memory_ids.append(100+i)
evaluate(mem5, "Noise + Paraphrase combined")
def main():
print("=" * 60)
print("Experiment 7e: Cue Augmentation")
print("=" * 60)
model = load_model()
test_augmentation(model)
if __name__ == "__main__":
main()