NuoNuo: Hippocampal memory module prototype
Hopfield + Hebbian hybrid memory system for LLMs. Two nights of experiments (16 iterations), validated on LongMemEval (ICLR 2025). Architecture: - Single-hop: Two-Stage Hopfield (NN top-20 → softmax settle) - Multi-hop: Hebbian W matrix with WTA pattern separation - 64% on LongMemEval (500 questions), retrieval-only, no LLM dependency - 4ms latency @ 20K memories, ~1GB VRAM Key findings: - Hopfield attention solved noise tolerance (20% → 100% vs flat Hebbian) - WTA pattern separation enables 20K+ capacity - Multi-hop associative chains (6 hops, CosSim=1.0) — RAG can't do this - MiniLM-L6 is optimal (discrimination gap > absolute similarity) - Paraphrase cue augmentation: 55% → 100% on synthetic, 36% → 64% on benchmark - SNN encoder viable (CosSim 0.99) but not needed for current architecture
This commit is contained in:
213
experiments/exp08_llm_integration.py
Normal file
213
experiments/exp08_llm_integration.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""Experiment P0: LLM Integration — end-to-end memory-augmented conversation.
|
||||
|
||||
Tests:
|
||||
1. Memory extraction (heuristic fallback since LLM gateway is down)
|
||||
2. Paraphrase generation (heuristic fallback)
|
||||
3. End-to-end: conversation → extract → store → recall → inject
|
||||
4. Multi-turn conversation simulation
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from nuonuo.hippocampus import HippocampalMemory
|
||||
from llm import (LLMClient, extract_memories_heuristic, extract_memories_llm,
|
||||
generate_paraphrases_heuristic, generate_paraphrases_llm,
|
||||
format_recalled_memories)
|
||||
|
||||
DEVICE = "cuda"
|
||||
|
||||
|
||||
def load_model():
|
||||
from sentence_transformers import SentenceTransformer
|
||||
return SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
|
||||
|
||||
|
||||
def emb(model, text):
|
||||
return model.encode([text], convert_to_tensor=True,
|
||||
normalize_embeddings=True, device=DEVICE)[0]
|
||||
|
||||
|
||||
def test_heuristic_extraction():
|
||||
"""Test memory extraction without LLM."""
|
||||
print("=== Test 1: Heuristic Memory Extraction ===\n")
|
||||
|
||||
conversations = [
|
||||
("How do I deploy to production?",
|
||||
"Use the blue-green deployment pipeline via GitHub Actions. The config is in .github/workflows/deploy.yml"),
|
||||
("The database is really slow today",
|
||||
"Check for missing indexes on the users table. Last time this happened it was the created_at column."),
|
||||
("Hi, how are you?",
|
||||
"I'm doing well, thanks!"),
|
||||
("What port does Redis run on?",
|
||||
"Redis is on port 6379 at redis.internal"),
|
||||
("Fix the auth bug please",
|
||||
"The auth service uses JWT tokens with 24h expiry stored in Redis. The bug was in token refresh logic."),
|
||||
]
|
||||
|
||||
for user_msg, assistant_msg in conversations:
|
||||
memories = extract_memories_heuristic(user_msg, assistant_msg)
|
||||
print(f" User: {user_msg[:50]}...")
|
||||
if memories:
|
||||
for m in memories:
|
||||
print(f" → CUE: {m.cue[:40]}... | TARGET: {m.target[:50]}... | IMP: {m.importance}")
|
||||
else:
|
||||
print(f" → (nothing extracted)")
|
||||
print()
|
||||
|
||||
|
||||
def test_heuristic_paraphrases():
|
||||
"""Test paraphrase generation without LLM."""
|
||||
print("=== Test 2: Heuristic Paraphrase Generation ===\n")
|
||||
|
||||
texts = [
|
||||
"How do I deploy to production?",
|
||||
"The database is slow",
|
||||
"Can you fix the authentication bug?",
|
||||
"I need to configure nginx",
|
||||
"Let's set up monitoring for the server",
|
||||
]
|
||||
|
||||
for text in texts:
|
||||
paras = generate_paraphrases_heuristic(text, n=3)
|
||||
print(f" Original: {text}")
|
||||
for p in paras:
|
||||
print(f" → {p}")
|
||||
print()
|
||||
|
||||
|
||||
def test_end_to_end(model):
|
||||
"""Full pipeline: conversation → extract → store → recall → inject."""
|
||||
print("=== Test 3: End-to-End Pipeline ===\n")
|
||||
|
||||
memory = HippocampalMemory(embed_dim=384)
|
||||
llm = LLMClient() # Will fail gracefully if gateway down
|
||||
|
||||
# Simulate a few conversation turns
|
||||
turns = [
|
||||
("How do I deploy to production?",
|
||||
"Use blue-green deployment via GitHub Actions. Config in .github/workflows/deploy.yml"),
|
||||
("The database is really slow",
|
||||
"Check for missing indexes on users table, especially created_at column"),
|
||||
("What port does Redis run on?",
|
||||
"Redis is on port 6379 at redis.internal"),
|
||||
("Fix the auth bug",
|
||||
"Auth uses JWT tokens with 24h expiry in Redis. Bug was in token refresh."),
|
||||
("How do I backup the database?",
|
||||
"Backups run daily at 3am UTC via cron job to S3. Config in /etc/cron.d/db-backup"),
|
||||
]
|
||||
|
||||
# Phase 1: Learn from conversations
|
||||
print("--- Phase 1: Learning from conversations ---")
|
||||
for user_msg, assistant_msg in turns:
|
||||
# Extract memories
|
||||
if llm.available:
|
||||
memories = extract_memories_llm(llm, user_msg, assistant_msg)
|
||||
else:
|
||||
memories = extract_memories_heuristic(user_msg, assistant_msg)
|
||||
|
||||
for mem_item in memories:
|
||||
# Generate paraphrases
|
||||
if llm.available:
|
||||
paras = generate_paraphrases_llm(llm, mem_item.cue, n=3)
|
||||
else:
|
||||
paras = generate_paraphrases_heuristic(mem_item.cue, n=3)
|
||||
|
||||
# Embed and store
|
||||
cue_emb = emb(model, mem_item.cue)
|
||||
target_emb = emb(model, mem_item.target)
|
||||
para_embs = [emb(model, p) for p in paras] if paras else None
|
||||
|
||||
mid = memory.store(
|
||||
cue_emb, target_emb,
|
||||
cue_variants=para_embs,
|
||||
metadata={"cue": mem_item.cue, "target": mem_item.target,
|
||||
"importance": mem_item.importance},
|
||||
)
|
||||
print(f" Stored [{mid}]: {mem_item.cue[:40]}... → {mem_item.target[:40]}...")
|
||||
if paras:
|
||||
print(f" + {len(paras)} paraphrases: {[p[:30] for p in paras]}")
|
||||
|
||||
print(f"\n Total: {memory.stats()}")
|
||||
|
||||
# Phase 2: Recall
|
||||
print("\n--- Phase 2: Recall from new queries ---")
|
||||
queries = [
|
||||
"DB performance is terrible",
|
||||
"How to push a new release?",
|
||||
"What's the Redis connection info?",
|
||||
"The login system has a problem",
|
||||
"Need to create a database backup",
|
||||
"Where's the deployment config?",
|
||||
]
|
||||
|
||||
for query in queries:
|
||||
query_emb = emb(model, query)
|
||||
|
||||
# Single-hop recall
|
||||
results = memory.recall(query_emb, top_k=2)
|
||||
|
||||
# Multi-hop
|
||||
chain = memory.recall_chain(query_emb, hops=2)
|
||||
|
||||
# Format for context injection
|
||||
all_results = results + [r for r in chain if r.memory_id not in {r2.memory_id for r2 in results}]
|
||||
context = format_recalled_memories(all_results)
|
||||
|
||||
print(f"\n Query: \"{query}\"")
|
||||
if results:
|
||||
print(f" Top result: {results[0].metadata.get('target', '?')[:60]}...")
|
||||
print(f" Similarity: {results[0].similarity:.3f}")
|
||||
if chain and len(chain) > 1:
|
||||
print(f" Chain hop 2: {chain[1].metadata.get('target', '?')[:60]}...")
|
||||
if context:
|
||||
print(f" Context injection:\n {context.replace(chr(10), chr(10) + ' ')}")
|
||||
|
||||
|
||||
def test_llm_live(model):
|
||||
"""Test with live LLM if available."""
|
||||
print("\n=== Test 4: Live LLM Integration ===\n")
|
||||
|
||||
llm = LLMClient()
|
||||
if not llm.available:
|
||||
print(" LLM Gateway not available. Skipping live test.")
|
||||
print(" To test: ensure https://ste-jarvis.tiktok-row.net/llm/v1 is reachable")
|
||||
return
|
||||
|
||||
# Test extraction
|
||||
user_msg = "The payment webhook keeps failing with a 502 error"
|
||||
assistant_msg = "The webhook endpoint at /api/payments/webhook is behind nginx. Check if the upstream timeout is too short — payment processing can take up to 30 seconds."
|
||||
|
||||
memories = extract_memories_llm(llm, user_msg, assistant_msg)
|
||||
print(f" Extracted {len(memories)} memories from live LLM:")
|
||||
for m in memories:
|
||||
print(f" CUE: {m.cue} | TARGET: {m.target[:60]}... | IMP: {m.importance}")
|
||||
|
||||
# Test paraphrase
|
||||
if memories:
|
||||
paras = generate_paraphrases_llm(llm, memories[0].cue, n=3)
|
||||
print(f"\n Paraphrases for '{memories[0].cue}':")
|
||||
for p in paras:
|
||||
print(f" → {p}")
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("Experiment P0: LLM Integration")
|
||||
print("=" * 60)
|
||||
|
||||
model = load_model()
|
||||
test_heuristic_extraction()
|
||||
test_heuristic_paraphrases()
|
||||
test_end_to_end(model)
|
||||
test_llm_live(model)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user