Hopfield + Hebbian hybrid memory system for LLMs. Two nights of experiments (16 iterations), validated on LongMemEval (ICLR 2025). Architecture: - Single-hop: Two-Stage Hopfield (NN top-20 → softmax settle) - Multi-hop: Hebbian W matrix with WTA pattern separation - 64% on LongMemEval (500 questions), retrieval-only, no LLM dependency - 4ms latency @ 20K memories, ~1GB VRAM Key findings: - Hopfield attention solved noise tolerance (20% → 100% vs flat Hebbian) - WTA pattern separation enables 20K+ capacity - Multi-hop associative chains (6 hops, CosSim=1.0) — RAG can't do this - MiniLM-L6 is optimal (discrimination gap > absolute similarity) - Paraphrase cue augmentation: 55% → 100% on synthetic, 36% → 64% on benchmark - SNN encoder viable (CosSim 0.99) but not needed for current architecture
214 lines
7.5 KiB
Python
214 lines
7.5 KiB
Python
"""Experiment P0: LLM Integration — end-to-end memory-augmented conversation.
|
|
|
|
Tests:
|
|
1. Memory extraction (heuristic fallback since LLM gateway is down)
|
|
2. Paraphrase generation (heuristic fallback)
|
|
3. End-to-end: conversation → extract → store → recall → inject
|
|
4. Multi-turn conversation simulation
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import torch
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from nuonuo.hippocampus import HippocampalMemory
|
|
from llm import (LLMClient, extract_memories_heuristic, extract_memories_llm,
|
|
generate_paraphrases_heuristic, generate_paraphrases_llm,
|
|
format_recalled_memories)
|
|
|
|
DEVICE = "cuda"
|
|
|
|
|
|
def load_model():
|
|
from sentence_transformers import SentenceTransformer
|
|
return SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
|
|
|
|
|
|
def emb(model, text):
|
|
return model.encode([text], convert_to_tensor=True,
|
|
normalize_embeddings=True, device=DEVICE)[0]
|
|
|
|
|
|
def test_heuristic_extraction():
|
|
"""Test memory extraction without LLM."""
|
|
print("=== Test 1: Heuristic Memory Extraction ===\n")
|
|
|
|
conversations = [
|
|
("How do I deploy to production?",
|
|
"Use the blue-green deployment pipeline via GitHub Actions. The config is in .github/workflows/deploy.yml"),
|
|
("The database is really slow today",
|
|
"Check for missing indexes on the users table. Last time this happened it was the created_at column."),
|
|
("Hi, how are you?",
|
|
"I'm doing well, thanks!"),
|
|
("What port does Redis run on?",
|
|
"Redis is on port 6379 at redis.internal"),
|
|
("Fix the auth bug please",
|
|
"The auth service uses JWT tokens with 24h expiry stored in Redis. The bug was in token refresh logic."),
|
|
]
|
|
|
|
for user_msg, assistant_msg in conversations:
|
|
memories = extract_memories_heuristic(user_msg, assistant_msg)
|
|
print(f" User: {user_msg[:50]}...")
|
|
if memories:
|
|
for m in memories:
|
|
print(f" → CUE: {m.cue[:40]}... | TARGET: {m.target[:50]}... | IMP: {m.importance}")
|
|
else:
|
|
print(f" → (nothing extracted)")
|
|
print()
|
|
|
|
|
|
def test_heuristic_paraphrases():
|
|
"""Test paraphrase generation without LLM."""
|
|
print("=== Test 2: Heuristic Paraphrase Generation ===\n")
|
|
|
|
texts = [
|
|
"How do I deploy to production?",
|
|
"The database is slow",
|
|
"Can you fix the authentication bug?",
|
|
"I need to configure nginx",
|
|
"Let's set up monitoring for the server",
|
|
]
|
|
|
|
for text in texts:
|
|
paras = generate_paraphrases_heuristic(text, n=3)
|
|
print(f" Original: {text}")
|
|
for p in paras:
|
|
print(f" → {p}")
|
|
print()
|
|
|
|
|
|
def test_end_to_end(model):
|
|
"""Full pipeline: conversation → extract → store → recall → inject."""
|
|
print("=== Test 3: End-to-End Pipeline ===\n")
|
|
|
|
memory = HippocampalMemory(embed_dim=384)
|
|
llm = LLMClient() # Will fail gracefully if gateway down
|
|
|
|
# Simulate a few conversation turns
|
|
turns = [
|
|
("How do I deploy to production?",
|
|
"Use blue-green deployment via GitHub Actions. Config in .github/workflows/deploy.yml"),
|
|
("The database is really slow",
|
|
"Check for missing indexes on users table, especially created_at column"),
|
|
("What port does Redis run on?",
|
|
"Redis is on port 6379 at redis.internal"),
|
|
("Fix the auth bug",
|
|
"Auth uses JWT tokens with 24h expiry in Redis. Bug was in token refresh."),
|
|
("How do I backup the database?",
|
|
"Backups run daily at 3am UTC via cron job to S3. Config in /etc/cron.d/db-backup"),
|
|
]
|
|
|
|
# Phase 1: Learn from conversations
|
|
print("--- Phase 1: Learning from conversations ---")
|
|
for user_msg, assistant_msg in turns:
|
|
# Extract memories
|
|
if llm.available:
|
|
memories = extract_memories_llm(llm, user_msg, assistant_msg)
|
|
else:
|
|
memories = extract_memories_heuristic(user_msg, assistant_msg)
|
|
|
|
for mem_item in memories:
|
|
# Generate paraphrases
|
|
if llm.available:
|
|
paras = generate_paraphrases_llm(llm, mem_item.cue, n=3)
|
|
else:
|
|
paras = generate_paraphrases_heuristic(mem_item.cue, n=3)
|
|
|
|
# Embed and store
|
|
cue_emb = emb(model, mem_item.cue)
|
|
target_emb = emb(model, mem_item.target)
|
|
para_embs = [emb(model, p) for p in paras] if paras else None
|
|
|
|
mid = memory.store(
|
|
cue_emb, target_emb,
|
|
cue_variants=para_embs,
|
|
metadata={"cue": mem_item.cue, "target": mem_item.target,
|
|
"importance": mem_item.importance},
|
|
)
|
|
print(f" Stored [{mid}]: {mem_item.cue[:40]}... → {mem_item.target[:40]}...")
|
|
if paras:
|
|
print(f" + {len(paras)} paraphrases: {[p[:30] for p in paras]}")
|
|
|
|
print(f"\n Total: {memory.stats()}")
|
|
|
|
# Phase 2: Recall
|
|
print("\n--- Phase 2: Recall from new queries ---")
|
|
queries = [
|
|
"DB performance is terrible",
|
|
"How to push a new release?",
|
|
"What's the Redis connection info?",
|
|
"The login system has a problem",
|
|
"Need to create a database backup",
|
|
"Where's the deployment config?",
|
|
]
|
|
|
|
for query in queries:
|
|
query_emb = emb(model, query)
|
|
|
|
# Single-hop recall
|
|
results = memory.recall(query_emb, top_k=2)
|
|
|
|
# Multi-hop
|
|
chain = memory.recall_chain(query_emb, hops=2)
|
|
|
|
# Format for context injection
|
|
all_results = results + [r for r in chain if r.memory_id not in {r2.memory_id for r2 in results}]
|
|
context = format_recalled_memories(all_results)
|
|
|
|
print(f"\n Query: \"{query}\"")
|
|
if results:
|
|
print(f" Top result: {results[0].metadata.get('target', '?')[:60]}...")
|
|
print(f" Similarity: {results[0].similarity:.3f}")
|
|
if chain and len(chain) > 1:
|
|
print(f" Chain hop 2: {chain[1].metadata.get('target', '?')[:60]}...")
|
|
if context:
|
|
print(f" Context injection:\n {context.replace(chr(10), chr(10) + ' ')}")
|
|
|
|
|
|
def test_llm_live(model):
|
|
"""Test with live LLM if available."""
|
|
print("\n=== Test 4: Live LLM Integration ===\n")
|
|
|
|
llm = LLMClient()
|
|
if not llm.available:
|
|
print(" LLM Gateway not available. Skipping live test.")
|
|
print(" To test: ensure https://ste-jarvis.tiktok-row.net/llm/v1 is reachable")
|
|
return
|
|
|
|
# Test extraction
|
|
user_msg = "The payment webhook keeps failing with a 502 error"
|
|
assistant_msg = "The webhook endpoint at /api/payments/webhook is behind nginx. Check if the upstream timeout is too short — payment processing can take up to 30 seconds."
|
|
|
|
memories = extract_memories_llm(llm, user_msg, assistant_msg)
|
|
print(f" Extracted {len(memories)} memories from live LLM:")
|
|
for m in memories:
|
|
print(f" CUE: {m.cue} | TARGET: {m.target[:60]}... | IMP: {m.importance}")
|
|
|
|
# Test paraphrase
|
|
if memories:
|
|
paras = generate_paraphrases_llm(llm, memories[0].cue, n=3)
|
|
print(f"\n Paraphrases for '{memories[0].cue}':")
|
|
for p in paras:
|
|
print(f" → {p}")
|
|
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("Experiment P0: LLM Integration")
|
|
print("=" * 60)
|
|
|
|
model = load_model()
|
|
test_heuristic_extraction()
|
|
test_heuristic_paraphrases()
|
|
test_end_to_end(model)
|
|
test_llm_live(model)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|