NuoNuo: Hippocampal memory module prototype

Hopfield + Hebbian hybrid memory system for LLMs. Two nights of experiments (16 iterations), validated on LongMemEval (ICLR 2025). Architecture: - Single-hop: Two-Stage Hopfield (NN top-20 → softmax settle) - Multi-hop: Hebbian W matrix with WTA pattern separation - 64% on LongMemEval (500 questions), retrieval-only, no LLM dependency - 4ms latency @ 20K memories, ~1GB VRAM Key findings: - Hopfield attention solved noise tolerance (20% → 100% vs flat Hebbian) - WTA pattern separation enables 20K+ capacity - Multi-hop associative chains (6 hops, CosSim=1.0) — RAG can't do this - MiniLM-L6 is optimal (discrimination gap > absolute similarity) - Paraphrase cue augmentation: 55% → 100% on synthetic, 36% → 64% on benchmark - SNN encoder viable (CosSim 0.99) but not needed for current architecture
2026-04-07 10:37:24 +01:00
commit d923aa1e31
65 changed files with 13148 additions and 0 deletions
--- a/experiments/exp08_llm_integration.py
+++ b/experiments/exp08_llm_integration.py
@@ -0,0 +1,213 @@
+"""Experiment P0: LLM Integration — end-to-end memory-augmented conversation.
+
+Tests:
+1. Memory extraction (heuristic fallback since LLM gateway is down)
+2. Paraphrase generation (heuristic fallback)
+3. End-to-end: conversation → extract → store → recall → inject
+4. Multi-turn conversation simulation
+"""
+
+import sys
+import time
+from pathlib import Path
+
+import torch
+
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from nuonuo.hippocampus import HippocampalMemory
+from llm import (LLMClient, extract_memories_heuristic, extract_memories_llm,
+                 generate_paraphrases_heuristic, generate_paraphrases_llm,
+                 format_recalled_memories)
+
+DEVICE = "cuda"
+
+
+def load_model():
+    from sentence_transformers import SentenceTransformer
+    return SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)
+
+
+def emb(model, text):
+    return model.encode([text], convert_to_tensor=True,
+                        normalize_embeddings=True, device=DEVICE)[0]
+
+
+def test_heuristic_extraction():
+    """Test memory extraction without LLM."""
+    print("=== Test 1: Heuristic Memory Extraction ===\n")
+
+    conversations = [
+        ("How do I deploy to production?",
+         "Use the blue-green deployment pipeline via GitHub Actions. The config is in .github/workflows/deploy.yml"),
+        ("The database is really slow today",
+         "Check for missing indexes on the users table. Last time this happened it was the created_at column."),
+        ("Hi, how are you?",
+         "I'm doing well, thanks!"),
+        ("What port does Redis run on?",
+         "Redis is on port 6379 at redis.internal"),
+        ("Fix the auth bug please",
+         "The auth service uses JWT tokens with 24h expiry stored in Redis. The bug was in token refresh logic."),
+    ]
+
+    for user_msg, assistant_msg in conversations:
+        memories = extract_memories_heuristic(user_msg, assistant_msg)
+        print(f"  User: {user_msg[:50]}...")
+        if memories:
+            for m in memories:
+                print(f"    → CUE: {m.cue[:40]}... | TARGET: {m.target[:50]}... | IMP: {m.importance}")
+        else:
+            print(f"    → (nothing extracted)")
+        print()
+
+
+def test_heuristic_paraphrases():
+    """Test paraphrase generation without LLM."""
+    print("=== Test 2: Heuristic Paraphrase Generation ===\n")
+
+    texts = [
+        "How do I deploy to production?",
+        "The database is slow",
+        "Can you fix the authentication bug?",
+        "I need to configure nginx",
+        "Let's set up monitoring for the server",
+    ]
+
+    for text in texts:
+        paras = generate_paraphrases_heuristic(text, n=3)
+        print(f"  Original: {text}")
+        for p in paras:
+            print(f"    → {p}")
+        print()
+
+
+def test_end_to_end(model):
+    """Full pipeline: conversation → extract → store → recall → inject."""
+    print("=== Test 3: End-to-End Pipeline ===\n")
+
+    memory = HippocampalMemory(embed_dim=384)
+    llm = LLMClient()  # Will fail gracefully if gateway down
+
+    # Simulate a few conversation turns
+    turns = [
+        ("How do I deploy to production?",
+         "Use blue-green deployment via GitHub Actions. Config in .github/workflows/deploy.yml"),
+        ("The database is really slow",
+         "Check for missing indexes on users table, especially created_at column"),
+        ("What port does Redis run on?",
+         "Redis is on port 6379 at redis.internal"),
+        ("Fix the auth bug",
+         "Auth uses JWT tokens with 24h expiry in Redis. Bug was in token refresh."),
+        ("How do I backup the database?",
+         "Backups run daily at 3am UTC via cron job to S3. Config in /etc/cron.d/db-backup"),
+    ]
+
+    # Phase 1: Learn from conversations
+    print("--- Phase 1: Learning from conversations ---")
+    for user_msg, assistant_msg in turns:
+        # Extract memories
+        if llm.available:
+            memories = extract_memories_llm(llm, user_msg, assistant_msg)
+        else:
+            memories = extract_memories_heuristic(user_msg, assistant_msg)
+
+        for mem_item in memories:
+            # Generate paraphrases
+            if llm.available:
+                paras = generate_paraphrases_llm(llm, mem_item.cue, n=3)
+            else:
+                paras = generate_paraphrases_heuristic(mem_item.cue, n=3)
+
+            # Embed and store
+            cue_emb = emb(model, mem_item.cue)
+            target_emb = emb(model, mem_item.target)
+            para_embs = [emb(model, p) for p in paras] if paras else None
+
+            mid = memory.store(
+                cue_emb, target_emb,
+                cue_variants=para_embs,
+                metadata={"cue": mem_item.cue, "target": mem_item.target,
+                          "importance": mem_item.importance},
+            )
+            print(f"  Stored [{mid}]: {mem_item.cue[:40]}... → {mem_item.target[:40]}...")
+            if paras:
+                print(f"    + {len(paras)} paraphrases: {[p[:30] for p in paras]}")
+
+    print(f"\n  Total: {memory.stats()}")
+
+    # Phase 2: Recall
+    print("\n--- Phase 2: Recall from new queries ---")
+    queries = [
+        "DB performance is terrible",
+        "How to push a new release?",
+        "What's the Redis connection info?",
+        "The login system has a problem",
+        "Need to create a database backup",
+        "Where's the deployment config?",
+    ]
+
+    for query in queries:
+        query_emb = emb(model, query)
+
+        # Single-hop recall
+        results = memory.recall(query_emb, top_k=2)
+
+        # Multi-hop
+        chain = memory.recall_chain(query_emb, hops=2)
+
+        # Format for context injection
+        all_results = results + [r for r in chain if r.memory_id not in {r2.memory_id for r2 in results}]
+        context = format_recalled_memories(all_results)
+
+        print(f"\n  Query: \"{query}\"")
+        if results:
+            print(f"    Top result: {results[0].metadata.get('target', '?')[:60]}...")
+            print(f"    Similarity: {results[0].similarity:.3f}")
+        if chain and len(chain) > 1:
+            print(f"    Chain hop 2: {chain[1].metadata.get('target', '?')[:60]}...")
+        if context:
+            print(f"    Context injection:\n      {context.replace(chr(10), chr(10) + '      ')}")
+
+
+def test_llm_live(model):
+    """Test with live LLM if available."""
+    print("\n=== Test 4: Live LLM Integration ===\n")
+
+    llm = LLMClient()
+    if not llm.available:
+        print("  LLM Gateway not available. Skipping live test.")
+        print("  To test: ensure https://ste-jarvis.tiktok-row.net/llm/v1 is reachable")
+        return
+
+    # Test extraction
+    user_msg = "The payment webhook keeps failing with a 502 error"
+    assistant_msg = "The webhook endpoint at /api/payments/webhook is behind nginx. Check if the upstream timeout is too short — payment processing can take up to 30 seconds."
+
+    memories = extract_memories_llm(llm, user_msg, assistant_msg)
+    print(f"  Extracted {len(memories)} memories from live LLM:")
+    for m in memories:
+        print(f"    CUE: {m.cue} | TARGET: {m.target[:60]}... | IMP: {m.importance}")
+
+    # Test paraphrase
+    if memories:
+        paras = generate_paraphrases_llm(llm, memories[0].cue, n=3)
+        print(f"\n  Paraphrases for '{memories[0].cue}':")
+        for p in paras:
+            print(f"    → {p}")
+
+
+def main():
+    print("=" * 60)
+    print("Experiment P0: LLM Integration")
+    print("=" * 60)
+
+    model = load_model()
+    test_heuristic_extraction()
+    test_heuristic_paraphrases()
+    test_end_to_end(model)
+    test_llm_live(model)
+
+
+if __name__ == "__main__":
+    main()