diff --git a/app/src/components/intelligence/MemoryTreeStatusPanel.test.tsx b/app/src/components/intelligence/MemoryTreeStatusPanel.test.tsx index 0a4d4d5731..bc1d275e34 100644 --- a/app/src/components/intelligence/MemoryTreeStatusPanel.test.tsx +++ b/app/src/components/intelligence/MemoryTreeStatusPanel.test.tsx @@ -345,6 +345,69 @@ describe('', () => { ); }); }); + + // ── #002 (T018): degraded status + first-blocking-cause banner ────────── + + it('renders the first-blocking-cause remediation banner with a degraded recall badge', async () => { + mockPipelineStatus.mockResolvedValueOnce( + payload({ + status: 'degraded', + reason: 'semantic recall disabled', + first_blocking_cause: { + code: 'embeddings_unconfigured', + class: 'unrecoverable', + remediation_key: 'memory.health.remediation.embeddings_unconfigured', + }, + degraded: { semantic_recall: true, structure: false }, + }) + ); + render(); + + await waitFor(() => { + expect(screen.getByTestId('memory-tree-status-label')).toHaveTextContent(/degraded/i); + }); + + // The remediation text comes from the i18n key the core supplied. + const remediation = screen.getByTestId('memory-tree-blocking-cause-remediation'); + expect(remediation).toHaveTextContent(/embeddings provider is configured/i); + // Recall badge present, structure badge absent. + expect(screen.getByTestId('memory-tree-badge-recall')).toBeInTheDocument(); + expect(screen.queryByTestId('memory-tree-badge-structure')).not.toBeInTheDocument(); + }); + + it('shows the structure badge when only extraction is degraded', async () => { + mockPipelineStatus.mockResolvedValueOnce( + payload({ + status: 'degraded', + first_blocking_cause: { + code: 'extraction_timeout', + class: 'unrecoverable', + remediation_key: 'memory.health.remediation.extraction_timeout', + }, + degraded: { semantic_recall: false, structure: true }, + }) + ); + render(); + + await waitFor(() => { + expect(screen.getByTestId('memory-tree-blocking-cause')).toBeInTheDocument(); + }); + expect(screen.getByTestId('memory-tree-badge-structure')).toBeInTheDocument(); + expect(screen.queryByTestId('memory-tree-badge-recall')).not.toBeInTheDocument(); + expect(screen.getByTestId('memory-tree-blocking-cause-remediation')).toHaveTextContent( + /extraction model is timing out/i + ); + }); + + it('does not render the blocking-cause banner on a healthy pipeline', async () => { + mockPipelineStatus.mockResolvedValueOnce(payload({ status: 'running' })); + render(); + + await waitFor(() => { + expect(screen.getByTestId('memory-tree-status-label')).toHaveTextContent(/running/i); + }); + expect(screen.queryByTestId('memory-tree-blocking-cause')).not.toBeInTheDocument(); + }); }); describe('integration health helpers', () => { diff --git a/app/src/components/intelligence/MemoryTreeStatusPanel.tsx b/app/src/components/intelligence/MemoryTreeStatusPanel.tsx index c8bdf2e039..879b61d36d 100644 --- a/app/src/components/intelligence/MemoryTreeStatusPanel.tsx +++ b/app/src/components/intelligence/MemoryTreeStatusPanel.tsx @@ -187,6 +187,9 @@ function statusDotClass(kind: MemoryTreePipelineStatus['status']): string { return 'bg-stone-400 dark:bg-neutral-500'; case 'error': return 'bg-coral-500'; + case 'degraded': + // Amber: the pipeline is running but recall/structure is reduced. + return 'bg-amber-500'; case 'idle': default: return 'bg-stone-400 dark:bg-neutral-500'; @@ -360,12 +363,21 @@ export function MemoryTreeStatusPanel({ onToast }: MemoryTreeStatusPanelProps) { return t('memoryTree.status.statusSyncing'); case 'error': return t('memoryTree.status.statusError'); + case 'degraded': + return t('memoryTree.status.statusDegraded'); case 'idle': default: return t('memoryTree.status.statusIdle'); } })(); + // #002 (FR-004): the single first blocking cause, rendered verbatim with a + // localized remediation. Prefer the explicit `first_blocking_cause`; fall + // back to the active degradation cause so older payload shapes still surface + // something actionable. + const blockingCause = status?.first_blocking_cause ?? status?.degraded?.cause ?? null; + const degraded = status?.degraded; + const checked = !(status?.is_paused ?? false); const tileClass = @@ -400,6 +412,37 @@ export function MemoryTreeStatusPanel({ onToast }: MemoryTreeStatusPanelProps) { ) : null} + {/* #002 (FR-004): actionable first-blocking-cause banner. Shown when the + core reports a typed cause — names the problem + the fix instead of a + generic "error". Degraded badges below distinguish recall vs structure. */} + {!loading && blockingCause ? ( +
+
+ {t(blockingCause.remediation_key, t('memory.health.remediation.unknown'))} +
+ {degraded?.semantic_recall || degraded?.structure ? ( +
+ {degraded?.semantic_recall ? ( + + {t('memoryTree.status.degradedRecall')} + + ) : null} + {degraded?.structure ? ( + + {t('memoryTree.status.degradedStructure')} + + ) : null} +
+ ) : null} +
+ ) : null} +
{/* Status tile ── color-coded pill */}
@@ -463,6 +506,20 @@ export function MemoryTreeStatusPanel({ onToast }: MemoryTreeStatusPanelProps) {
+ {/* #002 (FR-010 / US5): extraction coverage. Only meaningful once chunks + exist; near-0% with chunks present means the wiki is built but has no + structure (the extraction model is failing). */} + {!loading && status && status.total_chunks > 0 && status.extraction_coverage != null ? ( +
+ {t('memoryTree.status.extractionCoverage').replace( + '{pct}', + String(Math.round((status.extraction_coverage ?? 0) * 100)) + )} +
+ ) : null} + {/* Auto-sync toggle row — markup mirrors AIPanel's inline ToggleRow */} diff --git a/app/src/lib/i18n/ar.ts b/app/src/lib/i18n/ar.ts index 77e77ddcb4..4cbac6c841 100644 --- a/app/src/lib/i18n/ar.ts +++ b/app/src/lib/i18n/ar.ts @@ -4390,6 +4390,31 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'رفض التخزين المحلي', 'pages.settings.account.security': 'الأمان', 'pages.settings.account.securityDesc': 'وضع تخزين الأسرار وحالة سلسلة المفاتيح', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'متدهور', + 'memoryTree.status.degradedRecall': 'الاسترجاع الدلالي معطّل', + 'memoryTree.status.degradedStructure': 'بنية الويكي غير مكتملة', + 'memoryTree.status.extractionCoverage': 'تغطية الاستخراج: {pct}% من الأجزاء لها بنية', + 'memory.health.remediation.budget_exhausted': + 'استنفدت تضمينات الذاكرة الميزانية المُدارة. أعدّ تضمينات Ollama المحلية (الإعدادات → الذكاء الاصطناعي → التضمينات) أو أضف مفتاح API الخاص بك للتضمينات لمواصلة بناء الذاكرة.', + 'memory.health.remediation.auth_missing': + 'لم يتم العثور على بيانات اعتماد التضمينات. سجّل الدخول إلى OpenHuman، أو أعدّ تضمينات Ollama المحلية في الإعدادات → الذكاء الاصطناعي → التضمينات.', + 'memory.health.remediation.auth_invalid': + 'تم رفض بيانات اعتماد التضمينات الخاصة بك. أعد المصادقة، أو بدّل إلى تضمينات Ollama المحلية في الإعدادات → الذكاء الاصطناعي → التضمينات.', + 'memory.health.remediation.embeddings_unconfigured': + 'لم يتم تكوين أي مزوّد تضمينات، لذا فإن الاسترجاع الدلالي معطّل. أعدّ تضمينات Ollama المحلية (موصى به) أو أضف مفتاح تضمينات في الإعدادات → الذكاء الاصطناعي → التضمينات.', + 'memory.health.remediation.embedding_dim_mismatch': + 'يعيد نموذج التضمين حجم متجه خاطئًا (تتوقع الذاكرة 1024 بُعدًا). اختر نموذجًا بـ 1024 بُعدًا، أو اطلب 1024 بُعدًا من مزوّدك.', + 'memory.health.remediation.local_model_unavailable': + 'نموذج محلي مطلوب غير متوفر. ثبّت/شغّل Ollama ونزّل النموذج، أو بدّل هذا الحِمل إلى مزوّد سحابي في الإعدادات → الذكاء الاصطناعي.', + 'memory.health.remediation.extraction_timeout': + 'يتجاوز نموذج استخراج الذاكرة المهلة الزمنية، لذا فإن بنية الويكي قليلة. بدّل نموذج استخراج الذاكرة إلى نموذج أسرع في الإعدادات → الذكاء الاصطناعي.', + 'memory.health.remediation.summarizer_unavailable': + 'لا يتوفر مزوّد تلخيص لميزة إنشاء أشجار التلخيص. فعّل الذكاء الاصطناعي المحلي (Ollama)، أو فعّل تلخيص السحابة في الإعدادات → الذكاء الاصطناعي → الذاكرة.', + 'memory.health.remediation.transient': + 'حدث خطأ مؤقت أدى إلى مقاطعة معالجة الذاكرة. ستتم إعادة المحاولة تلقائيًا.', + 'memory.health.remediation.unknown': + 'واجهت معالجة الذاكرة مشكلة. تحقق من الإعدادات → الذكاء الاصطناعي للتكوين.', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'الملف: {title}', 'chat.artifact.generating': 'جارٍ إنشاء {kind}…', diff --git a/app/src/lib/i18n/bn.ts b/app/src/lib/i18n/bn.ts index 44d109a6df..1b47ffee82 100644 --- a/app/src/lib/i18n/bn.ts +++ b/app/src/lib/i18n/bn.ts @@ -4468,6 +4468,31 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'স্থানীয় সঞ্চয়স্থান প্রত্যাখ্যান করুন', 'pages.settings.account.security': 'নিরাপত্তা', 'pages.settings.account.securityDesc': 'গোপনীয়তা সঞ্চয়স্থান মোড এবং কিচেন অবস্থা', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'অবনমিত', + 'memoryTree.status.degradedRecall': 'সিম্যান্টিক রিকল নিষ্ক্রিয়', + 'memoryTree.status.degradedStructure': 'উইকি কাঠামো অসম্পূর্ণ', + 'memoryTree.status.extractionCoverage': 'এক্সট্র্যাকশন কভারেজ: {pct}% অংশের কাঠামো আছে', + 'memory.health.remediation.budget_exhausted': + 'মেমরি এমবেডিং পরিচালিত বাজেটে পৌঁছেছে। স্থানীয় Ollama এমবেডিং সেট আপ করুন (সেটিংস → AI → এমবেডিংস) অথবা মেমরি তৈরি চালিয়ে যেতে আপনার নিজস্ব এমবেডিং API কী যোগ করুন।', + 'memory.health.remediation.auth_missing': + 'কোনও এমবেডিং শংসাপত্র পাওয়া যায়নি। OpenHuman-এ লগ ইন করুন, অথবা সেটিংস → AI → এমবেডিংস-এ স্থানীয় Ollama এমবেডিং সেট আপ করুন।', + 'memory.health.remediation.auth_invalid': + 'আপনার এমবেডিং শংসাপত্র প্রত্যাখ্যাত হয়েছে। পুনরায় প্রমাণীকরণ করুন, অথবা সেটিংস → AI → এমবেডিংস-এ স্থানীয় Ollama এমবেডিং-এ স্যুইচ করুন।', + 'memory.health.remediation.embeddings_unconfigured': + 'কোনও এমবেডিং প্রদানকারী কনফিগার করা নেই, তাই সিম্যান্টিক রিকল বন্ধ। স্থানীয় Ollama এমবেডিং সেট আপ করুন (প্রস্তাবিত) অথবা সেটিংস → AI → এমবেডিংস-এ একটি এমবেডিং কী যোগ করুন।', + 'memory.health.remediation.embedding_dim_mismatch': + 'এমবেডিং মডেল ভুল ভেক্টর আকার ফেরত দেয় (মেমরি 1024 মাত্রা প্রত্যাশা করে)। 1024-মাত্রার একটি মডেল বেছে নিন, অথবা আপনার প্রদানকারীর কাছে 1024 মাত্রা অনুরোধ করুন।', + 'memory.health.remediation.local_model_unavailable': + 'একটি প্রয়োজনীয় স্থানীয় মডেল উপলব্ধ নেই। Ollama ইনস্টল/চালু করুন এবং মডেলটি ডাউনলোড করুন, অথবা সেটিংস → AI-তে এই কাজের চাপ একটি ক্লাউড প্রদানকারীতে স্যুইচ করুন।', + 'memory.health.remediation.extraction_timeout': + 'মেমরি এক্সট্র্যাকশন মডেল টাইম আউট হচ্ছে, তাই উইকিতে সামান্য কাঠামো আছে। সেটিংস → AI-তে মেমরি এক্সট্র্যাকশন মডেল একটি দ্রুততর মডেলে পরিবর্তন করুন।', + 'memory.health.remediation.summarizer_unavailable': + 'সারাংশ ট্রি তৈরির জন্য কোনও সারাংশ প্রদানকারী উপলব্ধ নেই। স্থানীয় AI (Ollama) সক্ষম করুন, অথবা সেটিংস → AI → মেমরিতে ক্লাউড সারাংশ সক্ষম করুন।', + 'memory.health.remediation.transient': + 'একটি অস্থায়ী ত্রুটি মেমরি প্রক্রিয়াকরণে বাধা দিয়েছে। স্বয়ংক্রিয়ভাবে পুনরায় চেষ্টা করা হবে।', + 'memory.health.remediation.unknown': + 'মেমরি প্রক্রিয়াকরণে একটি সমস্যা হয়েছে। কনফিগারেশনের জন্য সেটিংস → AI পরীক্ষা করুন।', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'আর্টিফ্যাক্ট: {title}', 'chat.artifact.generating': '{kind} তৈরি হচ্ছে…', diff --git a/app/src/lib/i18n/de.ts b/app/src/lib/i18n/de.ts index 2d4a1e8861..f80c69f583 100644 --- a/app/src/lib/i18n/de.ts +++ b/app/src/lib/i18n/de.ts @@ -4585,6 +4585,32 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'Lokalen Speicher ablehnen', 'pages.settings.account.security': 'Sicherheit', 'pages.settings.account.securityDesc': 'Geheimnisspeicher-Modus und Schlüsselbund-Status', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'Eingeschränkt', + 'memoryTree.status.degradedRecall': 'Semantische Suche deaktiviert', + 'memoryTree.status.degradedStructure': 'Wiki-Struktur unvollständig', + 'memoryTree.status.extractionCoverage': + 'Extraktionsabdeckung: {pct}% der Abschnitte haben Struktur', + 'memory.health.remediation.budget_exhausted': + 'Die Speicher-Embeddings haben das verwaltete Budget erreicht. Richte lokale Ollama-Embeddings ein (Einstellungen → KI → Einbettungen) oder füge deinen eigenen Embeddings-API-Schlüssel hinzu, um den Speicher weiter aufzubauen.', + 'memory.health.remediation.auth_missing': + 'Keine Embeddings-Anmeldedaten gefunden. Melde dich bei OpenHuman an oder richte lokale Ollama-Embeddings unter Einstellungen → KI → Einbettungen ein.', + 'memory.health.remediation.auth_invalid': + 'Deine Embeddings-Anmeldedaten wurden abgelehnt. Authentifiziere dich erneut oder wechsle unter Einstellungen → KI → Einbettungen zu lokalen Ollama-Embeddings.', + 'memory.health.remediation.embeddings_unconfigured': + 'Es ist kein Embeddings-Anbieter konfiguriert, daher ist die semantische Suche deaktiviert. Richte lokale Ollama-Embeddings ein (empfohlen) oder füge unter Einstellungen → KI → Einbettungen einen Embeddings-Schlüssel hinzu.', + 'memory.health.remediation.embedding_dim_mismatch': + 'Das Embedding-Modell liefert die falsche Vektorgröße (der Speicher erwartet 1024 Dimensionen). Wähle ein Modell mit 1024 Dimensionen oder fordere 1024 Dimensionen von deinem Anbieter an.', + 'memory.health.remediation.local_model_unavailable': + 'Ein erforderliches lokales Modell ist nicht verfügbar. Installiere/starte Ollama und lade das Modell herunter, oder wechsle diese Arbeitslast unter Einstellungen → KI zu einem Cloud-Anbieter.', + 'memory.health.remediation.extraction_timeout': + 'Das Modell zur Speicherextraktion überschreitet die Zeit, daher hat das Wiki wenig Struktur. Wechsle das Modell für die Speicherextraktion unter Einstellungen → KI zu einem schnelleren.', + 'memory.health.remediation.summarizer_unavailable': + 'Für „Zusammenfassungsbäume erstellen” ist kein Zusammenfassungsanbieter verfügbar. Aktiviere die lokale KI (Ollama) oder aktiviere die Cloud-Zusammenfassung unter Einstellungen → KI → Speicher.', + 'memory.health.remediation.transient': + 'Ein vorübergehender Fehler hat die Speicherverarbeitung unterbrochen. Es wird automatisch erneut versucht.', + 'memory.health.remediation.unknown': + 'Bei der Speicherverarbeitung ist ein Problem aufgetreten. Überprüfe Einstellungen → KI für die Konfiguration.', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'Artefakt: {title}', 'chat.artifact.generating': 'Erstelle {kind}…', diff --git a/app/src/lib/i18n/en.ts b/app/src/lib/i18n/en.ts index 1af41bc9c0..8598d607fb 100644 --- a/app/src/lib/i18n/en.ts +++ b/app/src/lib/i18n/en.ts @@ -504,7 +504,34 @@ const en: TranslationMap = { 'memoryTree.status.statusSyncing': 'Syncing', 'memoryTree.status.statusError': 'Error', 'memoryTree.status.statusIdle': 'Idle', + 'memoryTree.status.statusDegraded': 'Degraded', 'memoryTree.status.never': 'Never', + // #002: degraded badges + typed remediation strings. The Rust core sends a + // `remediation_key` (one of memory.health.remediation.*) which the status + // panel resolves verbatim, so the cause + fix come from one source of truth. + 'memoryTree.status.degradedRecall': 'Semantic recall disabled', + 'memoryTree.status.degradedStructure': 'Wiki structure incomplete', + 'memoryTree.status.extractionCoverage': 'Extraction coverage: {pct}% of chunks have structure', + 'memory.health.remediation.budget_exhausted': + 'Memory embeddings hit the managed budget. Set up local Ollama embeddings (Settings → AI → Embeddings) or add your own embeddings API key to keep building memory.', + 'memory.health.remediation.auth_missing': + 'No embeddings credentials found. Log in to OpenHuman, or set up local Ollama embeddings in Settings → AI → Embeddings.', + 'memory.health.remediation.auth_invalid': + 'Your embeddings credentials were rejected. Re-authenticate, or switch to local Ollama embeddings in Settings → AI → Embeddings.', + 'memory.health.remediation.embeddings_unconfigured': + 'No embeddings provider is configured, so semantic recall is off. Set up local Ollama embeddings (recommended) or add an embeddings key in Settings → AI → Embeddings.', + 'memory.health.remediation.embedding_dim_mismatch': + 'The embedding model returns the wrong vector size (memory expects 1024 dimensions). Pick a 1024-dim model, or request 1024 dimensions for your provider.', + 'memory.health.remediation.local_model_unavailable': + 'A required local model is not available. Install/run Ollama and pull the model, or switch this workload to a cloud provider in Settings → AI.', + 'memory.health.remediation.extraction_timeout': + 'The memory extraction model is timing out, so the wiki has little structure. Switch the Memory extraction model to a faster one in Settings → AI.', + 'memory.health.remediation.summarizer_unavailable': + 'No summarization provider is available for Build Summary Trees. Enable local AI (Ollama), or enable cloud summarization in Settings → AI → Memory.', + 'memory.health.remediation.transient': + 'A temporary error interrupted memory processing. It will retry automatically.', + 'memory.health.remediation.unknown': + 'Memory processing encountered an issue. Check Settings → AI for configuration.', 'memoryTree.status.fetchError': "Couldn't fetch Memory Tree status", 'memoryTree.status.retry': 'Retry', 'memoryTree.status.toggleFailed': "Couldn't toggle auto-sync", diff --git a/app/src/lib/i18n/es.ts b/app/src/lib/i18n/es.ts index f211536bc4..bdea137c8c 100644 --- a/app/src/lib/i18n/es.ts +++ b/app/src/lib/i18n/es.ts @@ -4551,6 +4551,32 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'Rechazar almacenamiento local', 'pages.settings.account.security': 'Seguridad', 'pages.settings.account.securityDesc': 'Modo de almacenamiento de secretos y estado del llavero', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'Degradado', + 'memoryTree.status.degradedRecall': 'Recuperación semántica desactivada', + 'memoryTree.status.degradedStructure': 'Estructura de la wiki incompleta', + 'memoryTree.status.extractionCoverage': + 'Cobertura de extracción: {pct}% de los fragmentos tienen estructura', + 'memory.health.remediation.budget_exhausted': + 'Los embeddings de memoria agotaron el presupuesto gestionado. Configura embeddings locales de Ollama (Configuración → IA → Incrustaciones) o añade tu propia clave de API de embeddings para seguir construyendo la memoria.', + 'memory.health.remediation.auth_missing': + 'No se encontraron credenciales de embeddings. Inicia sesión en OpenHuman o configura embeddings locales de Ollama en Configuración → IA → Incrustaciones.', + 'memory.health.remediation.auth_invalid': + 'Tus credenciales de embeddings fueron rechazadas. Vuelve a autenticarte o cambia a embeddings locales de Ollama en Configuración → IA → Incrustaciones.', + 'memory.health.remediation.embeddings_unconfigured': + 'No hay ningún proveedor de embeddings configurado, por lo que la recuperación semántica está desactivada. Configura embeddings locales de Ollama (recomendado) o añade una clave de embeddings en Configuración → IA → Incrustaciones.', + 'memory.health.remediation.embedding_dim_mismatch': + 'El modelo de embeddings devuelve un tamaño de vector incorrecto (la memoria espera 1024 dimensiones). Elige un modelo de 1024 dimensiones o solicita 1024 dimensiones a tu proveedor.', + 'memory.health.remediation.local_model_unavailable': + 'No hay disponible un modelo local requerido. Instala/ejecuta Ollama y descarga el modelo, o cambia esta carga de trabajo a un proveedor en la nube en Configuración → IA.', + 'memory.health.remediation.extraction_timeout': + 'El modelo de extracción de memoria está agotando el tiempo de espera, por lo que la wiki tiene poca estructura. Cambia el modelo de extracción de memoria por uno más rápido en Configuración → IA.', + 'memory.health.remediation.summarizer_unavailable': + 'No hay ningún proveedor de resúmenes disponible para Crear árboles de resumen. Activa la IA local (Ollama) o activa el resumen en la nube en Configuración → IA → Memoria.', + 'memory.health.remediation.transient': + 'Un error temporal interrumpió el procesamiento de la memoria. Se reintentará automáticamente.', + 'memory.health.remediation.unknown': + 'El procesamiento de la memoria encontró un problema. Comprueba Configuración → IA para la configuración.', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'Artefacto: {title}', 'chat.artifact.generating': 'Generando {kind}…', diff --git a/app/src/lib/i18n/fr.ts b/app/src/lib/i18n/fr.ts index b5009d3284..b590c16df6 100644 --- a/app/src/lib/i18n/fr.ts +++ b/app/src/lib/i18n/fr.ts @@ -4567,6 +4567,32 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'Refuser le stockage local', 'pages.settings.account.security': 'Sécurité', 'pages.settings.account.securityDesc': 'Mode de stockage des secrets et état du trousseau', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'Dégradé', + 'memoryTree.status.degradedRecall': 'Rappel sémantique désactivé', + 'memoryTree.status.degradedStructure': 'Structure du wiki incomplète', + 'memoryTree.status.extractionCoverage': + "Couverture d'extraction : {pct}% des fragments ont une structure", + 'memory.health.remediation.budget_exhausted': + "Les embeddings de mémoire ont atteint le budget géré. Configurez des embeddings Ollama locaux (Paramètres → IA → Encastrements) ou ajoutez votre propre clé d'API d'embeddings pour continuer à construire la mémoire.", + 'memory.health.remediation.auth_missing': + "Aucune information d'identification d'embeddings trouvée. Connectez-vous à OpenHuman ou configurez des embeddings Ollama locaux dans Paramètres → IA → Encastrements.", + 'memory.health.remediation.auth_invalid': + "Vos informations d'identification d'embeddings ont été rejetées. Authentifiez-vous à nouveau ou passez aux embeddings Ollama locaux dans Paramètres → IA → Encastrements.", + 'memory.health.remediation.embeddings_unconfigured': + "Aucun fournisseur d'embeddings n'est configuré, le rappel sémantique est donc désactivé. Configurez des embeddings Ollama locaux (recommandé) ou ajoutez une clé d'embeddings dans Paramètres → IA → Encastrements.", + 'memory.health.remediation.embedding_dim_mismatch': + "Le modèle d'embeddings renvoie une taille de vecteur incorrecte (la mémoire attend 1024 dimensions). Choisissez un modèle à 1024 dimensions ou demandez 1024 dimensions à votre fournisseur.", + 'memory.health.remediation.local_model_unavailable': + "Un modèle local requis n'est pas disponible. Installez/lancez Ollama et téléchargez le modèle, ou basculez cette charge de travail vers un fournisseur cloud dans Paramètres → IA.", + 'memory.health.remediation.extraction_timeout': + "Le modèle d'extraction de mémoire dépasse le délai imparti, le wiki a donc peu de structure. Choisissez un modèle d'extraction de mémoire plus rapide dans Paramètres → IA.", + 'memory.health.remediation.summarizer_unavailable': + "Aucun fournisseur de résumé n'est disponible pour Créer des arbres de résumé. Activez l'IA locale (Ollama) ou activez la synthèse cloud dans Paramètres → IA → Mémoire.", + 'memory.health.remediation.transient': + 'Une erreur temporaire a interrompu le traitement de la mémoire. Une nouvelle tentative aura lieu automatiquement.', + 'memory.health.remediation.unknown': + 'Le traitement de la mémoire a rencontré un problème. Vérifiez Paramètres → IA pour la configuration.', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'Artefact : {title}', 'chat.artifact.generating': 'Génération de {kind}…', diff --git a/app/src/lib/i18n/hi.ts b/app/src/lib/i18n/hi.ts index bbe664e1fb..fd4e4fc1e6 100644 --- a/app/src/lib/i18n/hi.ts +++ b/app/src/lib/i18n/hi.ts @@ -4475,6 +4475,31 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'स्थानीय भंडारण अस्वीकार करें', 'pages.settings.account.security': 'सुरक्षा', 'pages.settings.account.securityDesc': 'रहस्य भंडारण मोड और कीचेन स्थिति', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'अवक्रमित', + 'memoryTree.status.degradedRecall': 'सिमेंटिक रिकॉल अक्षम', + 'memoryTree.status.degradedStructure': 'विकी संरचना अधूरी', + 'memoryTree.status.extractionCoverage': 'एक्सट्रैक्शन कवरेज: {pct}% खंडों में संरचना है', + 'memory.health.remediation.budget_exhausted': + 'मेमोरी एम्बेडिंग प्रबंधित बजट तक पहुँच गई। स्थानीय Ollama एम्बेडिंग सेट करें (सेटिंग्स → AI → एम्बेडिंग्स) या मेमोरी बनाना जारी रखने के लिए अपनी स्वयं की एम्बेडिंग API कुंजी जोड़ें।', + 'memory.health.remediation.auth_missing': + 'कोई एम्बेडिंग क्रेडेंशियल नहीं मिला। OpenHuman में लॉग इन करें, या सेटिंग्स → AI → एम्बेडिंग्स में स्थानीय Ollama एम्बेडिंग सेट करें।', + 'memory.health.remediation.auth_invalid': + 'आपके एम्बेडिंग क्रेडेंशियल अस्वीकार कर दिए गए। फिर से प्रमाणित करें, या सेटिंग्स → AI → एम्बेडिंग्स में स्थानीय Ollama एम्बेडिंग पर स्विच करें।', + 'memory.health.remediation.embeddings_unconfigured': + 'कोई एम्बेडिंग प्रदाता कॉन्फ़िगर नहीं किया गया है, इसलिए सिमेंटिक रिकॉल बंद है। स्थानीय Ollama एम्बेडिंग सेट करें (अनुशंसित) या सेटिंग्स → AI → एम्बेडिंग्स में एम्बेडिंग कुंजी जोड़ें।', + 'memory.health.remediation.embedding_dim_mismatch': + 'एम्बेडिंग मॉडल गलत वेक्टर आकार लौटाता है (मेमोरी को 1024 आयाम अपेक्षित हैं)। 1024-आयाम वाला मॉडल चुनें, या अपने प्रदाता से 1024 आयाम का अनुरोध करें।', + 'memory.health.remediation.local_model_unavailable': + 'एक आवश्यक स्थानीय मॉडल उपलब्ध नहीं है। Ollama इंस्टॉल/चलाएँ और मॉडल डाउनलोड करें, या सेटिंग्स → AI में इस वर्कलोड को क्लाउड प्रदाता पर स्विच करें।', + 'memory.health.remediation.extraction_timeout': + 'मेमोरी एक्सट्रैक्शन मॉडल टाइम आउट हो रहा है, इसलिए विकी में बहुत कम संरचना है। सेटिंग्स → AI में मेमोरी एक्सट्रैक्शन मॉडल को तेज़ मॉडल में बदलें।', + 'memory.health.remediation.summarizer_unavailable': + 'सारांश ट्री बनाएँ के लिए कोई सारांश प्रदाता उपलब्ध नहीं है। स्थानीय AI (Ollama) सक्षम करें, या सेटिंग्स → AI → मेमोरी में क्लाउड सारांश सक्षम करें।', + 'memory.health.remediation.transient': + 'एक अस्थायी त्रुटि ने मेमोरी प्रोसेसिंग को बाधित किया। स्वचालित रूप से पुनः प्रयास किया जाएगा।', + 'memory.health.remediation.unknown': + 'मेमोरी प्रोसेसिंग में एक समस्या आई। कॉन्फ़िगरेशन के लिए सेटिंग्स → AI जाँचें।', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'आर्टिफैक्ट: {title}', 'chat.artifact.generating': '{kind} बना रहा है…', diff --git a/app/src/lib/i18n/id.ts b/app/src/lib/i18n/id.ts index d61c8237e1..3448a3a27f 100644 --- a/app/src/lib/i18n/id.ts +++ b/app/src/lib/i18n/id.ts @@ -4485,6 +4485,31 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'Tolak penyimpanan lokal', 'pages.settings.account.security': 'Keamanan', 'pages.settings.account.securityDesc': 'Mode penyimpanan rahasia dan status keychain', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'Terdegradasi', + 'memoryTree.status.degradedRecall': 'Recall semantik dinonaktifkan', + 'memoryTree.status.degradedStructure': 'Struktur wiki tidak lengkap', + 'memoryTree.status.extractionCoverage': 'Cakupan ekstraksi: {pct}% bagian memiliki struktur', + 'memory.health.remediation.budget_exhausted': + 'Embedding memori mencapai batas anggaran terkelola. Siapkan embedding Ollama lokal (Pengaturan → AI → Sematan) atau tambahkan kunci API embedding Anda sendiri untuk terus membangun memori.', + 'memory.health.remediation.auth_missing': + 'Kredensial embedding tidak ditemukan. Masuk ke OpenHuman, atau siapkan embedding Ollama lokal di Pengaturan → AI → Sematan.', + 'memory.health.remediation.auth_invalid': + 'Kredensial embedding Anda ditolak. Autentikasi ulang, atau beralih ke embedding Ollama lokal di Pengaturan → AI → Sematan.', + 'memory.health.remediation.embeddings_unconfigured': + 'Tidak ada penyedia embedding yang dikonfigurasi, sehingga recall semantik nonaktif. Siapkan embedding Ollama lokal (disarankan) atau tambahkan kunci embedding di Pengaturan → AI → Sematan.', + 'memory.health.remediation.embedding_dim_mismatch': + 'Model embedding mengembalikan ukuran vektor yang salah (memori mengharapkan 1024 dimensi). Pilih model 1024 dimensi, atau minta 1024 dimensi dari penyedia Anda.', + 'memory.health.remediation.local_model_unavailable': + 'Model lokal yang diperlukan tidak tersedia. Instal/jalankan Ollama dan unduh model, atau alihkan beban kerja ini ke penyedia cloud di Pengaturan → AI.', + 'memory.health.remediation.extraction_timeout': + 'Model ekstraksi memori kehabisan waktu, sehingga wiki memiliki sedikit struktur. Ganti model ekstraksi memori ke yang lebih cepat di Pengaturan → AI.', + 'memory.health.remediation.summarizer_unavailable': + 'Tidak ada penyedia ringkasan yang tersedia untuk Buat Pohon Ringkasan. Aktifkan AI lokal (Ollama), atau aktifkan ringkasan cloud di Pengaturan → AI → Memori.', + 'memory.health.remediation.transient': + 'Kesalahan sementara mengganggu pemrosesan memori. Akan dicoba lagi secara otomatis.', + 'memory.health.remediation.unknown': + 'Pemrosesan memori mengalami masalah. Periksa Pengaturan → AI untuk konfigurasi.', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'Artefak: {title}', 'chat.artifact.generating': 'Membuat {kind}…', diff --git a/app/src/lib/i18n/it.ts b/app/src/lib/i18n/it.ts index 2990b3546a..f440ee059c 100644 --- a/app/src/lib/i18n/it.ts +++ b/app/src/lib/i18n/it.ts @@ -4543,6 +4543,32 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'Rifiuta archiviazione locale', 'pages.settings.account.security': 'Sicurezza', 'pages.settings.account.securityDesc': 'Modalità archiviazione segreti e stato del portachiavi', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'Degradato', + 'memoryTree.status.degradedRecall': 'Richiamo semantico disattivato', + 'memoryTree.status.degradedStructure': 'Struttura del wiki incompleta', + 'memoryTree.status.extractionCoverage': + 'Copertura di estrazione: {pct}% dei frammenti ha una struttura', + 'memory.health.remediation.budget_exhausted': + 'Gli embedding della memoria hanno raggiunto il budget gestito. Configura embedding Ollama locali (Impostazioni → IA → Incorporamenti) o aggiungi la tua chiave API per gli embedding per continuare a costruire la memoria.', + 'memory.health.remediation.auth_missing': + 'Nessuna credenziale per gli embedding trovata. Accedi a OpenHuman o configura embedding Ollama locali in Impostazioni → IA → Incorporamenti.', + 'memory.health.remediation.auth_invalid': + 'Le tue credenziali per gli embedding sono state rifiutate. Autenticati di nuovo o passa agli embedding Ollama locali in Impostazioni → IA → Incorporamenti.', + 'memory.health.remediation.embeddings_unconfigured': + 'Nessun provider di embedding è configurato, quindi il richiamo semantico è disattivato. Configura embedding Ollama locali (consigliato) o aggiungi una chiave per gli embedding in Impostazioni → IA → Incorporamenti.', + 'memory.health.remediation.embedding_dim_mismatch': + 'Il modello di embedding restituisce una dimensione del vettore errata (la memoria prevede 1024 dimensioni). Scegli un modello a 1024 dimensioni o richiedi 1024 dimensioni al tuo provider.', + 'memory.health.remediation.local_model_unavailable': + 'Un modello locale richiesto non è disponibile. Installa/avvia Ollama e scarica il modello, oppure passa questo carico di lavoro a un provider cloud in Impostazioni → IA.', + 'memory.health.remediation.extraction_timeout': + 'Il modello di estrazione della memoria sta andando in timeout, quindi il wiki ha poca struttura. Passa a un modello di estrazione della memoria più veloce in Impostazioni → IA.', + 'memory.health.remediation.summarizer_unavailable': + "Nessun provider di riepilogo è disponibile per Crea alberi di riepilogo. Abilita l'IA locale (Ollama) o abilita il riepilogo cloud in Impostazioni → IA → Memoria.", + 'memory.health.remediation.transient': + "Un errore temporaneo ha interrotto l'elaborazione della memoria. Verrà riprovato automaticamente.", + 'memory.health.remediation.unknown': + "L'elaborazione della memoria ha riscontrato un problema. Controlla Impostazioni → IA per la configurazione.", // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'Artefatto: {title}', 'chat.artifact.generating': 'Generazione {kind}…', diff --git a/app/src/lib/i18n/ko.ts b/app/src/lib/i18n/ko.ts index b3b8b685b7..a69915d859 100644 --- a/app/src/lib/i18n/ko.ts +++ b/app/src/lib/i18n/ko.ts @@ -4432,6 +4432,31 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': '로컬 저장소 거부', 'pages.settings.account.security': '보안', 'pages.settings.account.securityDesc': '비밀 저장 모드 및 키체인 상태', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': '저하됨', + 'memoryTree.status.degradedRecall': '의미 기반 검색 비활성화됨', + 'memoryTree.status.degradedStructure': '위키 구조 불완전', + 'memoryTree.status.extractionCoverage': '추출 범위: 청크의 {pct}%에 구조가 있음', + 'memory.health.remediation.budget_exhausted': + '메모리 임베딩이 관리형 예산에 도달했습니다. 로컬 Ollama 임베딩을 설정하거나(설정 → AI → 임베딩) 메모리를 계속 구축하려면 자체 임베딩 API 키를 추가하세요.', + 'memory.health.remediation.auth_missing': + '임베딩 자격 증명을 찾을 수 없습니다. OpenHuman에 로그인하거나 설정 → AI → 임베딩에서 로컬 Ollama 임베딩을 설정하세요.', + 'memory.health.remediation.auth_invalid': + '임베딩 자격 증명이 거부되었습니다. 다시 인증하거나 설정 → AI → 임베딩에서 로컬 Ollama 임베딩으로 전환하세요.', + 'memory.health.remediation.embeddings_unconfigured': + '구성된 임베딩 제공자가 없어 의미 기반 검색이 꺼져 있습니다. 로컬 Ollama 임베딩을 설정하거나(권장) 설정 → AI → 임베딩에서 임베딩 키를 추가하세요.', + 'memory.health.remediation.embedding_dim_mismatch': + '임베딩 모델이 잘못된 벡터 크기를 반환합니다(메모리는 1024차원을 예상함). 1024차원 모델을 선택하거나 제공자에게 1024차원을 요청하세요.', + 'memory.health.remediation.local_model_unavailable': + '필요한 로컬 모델을 사용할 수 없습니다. Ollama를 설치/실행하고 모델을 다운로드하거나, 설정 → AI에서 이 작업을 클라우드 제공자로 전환하세요.', + 'memory.health.remediation.extraction_timeout': + '메모리 추출 모델이 시간 초과되어 위키 구조가 거의 없습니다. 설정 → AI에서 메모리 추출 모델을 더 빠른 것으로 변경하세요.', + 'memory.health.remediation.summarizer_unavailable': + '요약 트리 만들기에 사용할 수 있는 요약 제공자가 없습니다. 로컬 AI(Ollama)를 활성화하거나, 설정 → AI → 메모리에서 클라우드 요약을 활성화하세요.', + 'memory.health.remediation.transient': + '일시적인 오류로 메모리 처리가 중단되었습니다. 자동으로 다시 시도됩니다.', + 'memory.health.remediation.unknown': + '메모리 처리 중 문제가 발생했습니다. 설정 → AI에서 구성을 확인하세요.', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': '아티팩트: {title}', 'chat.artifact.generating': '{kind} 생성 중…', diff --git a/app/src/lib/i18n/pl.ts b/app/src/lib/i18n/pl.ts index 47adfed35f..5585948548 100644 --- a/app/src/lib/i18n/pl.ts +++ b/app/src/lib/i18n/pl.ts @@ -4542,6 +4542,31 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'Odmów lokalnego przechowywania', 'pages.settings.account.security': 'Bezpieczeństwo', 'pages.settings.account.securityDesc': 'Tryb przechowywania sekretów i stan pęku kluczy', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'Ograniczony', + 'memoryTree.status.degradedRecall': 'Wyszukiwanie semantyczne wyłączone', + 'memoryTree.status.degradedStructure': 'Struktura wiki niekompletna', + 'memoryTree.status.extractionCoverage': 'Pokrycie ekstrakcji: {pct}% fragmentów ma strukturę', + 'memory.health.remediation.budget_exhausted': + 'Osadzenia pamięci wyczerpały zarządzany budżet. Skonfiguruj lokalne osadzenia Ollama (Ustawienia → AI → Embeddings) lub dodaj własny klucz API osadzeń, aby kontynuować budowanie pamięci.', + 'memory.health.remediation.auth_missing': + 'Nie znaleziono poświadczeń osadzeń. Zaloguj się do OpenHuman lub skonfiguruj lokalne osadzenia Ollama w Ustawienia → AI → Embeddings.', + 'memory.health.remediation.auth_invalid': + 'Twoje poświadczenia osadzeń zostały odrzucone. Uwierzytelnij się ponownie lub przełącz na lokalne osadzenia Ollama w Ustawienia → AI → Embeddings.', + 'memory.health.remediation.embeddings_unconfigured': + 'Nie skonfigurowano dostawcy osadzeń, więc wyszukiwanie semantyczne jest wyłączone. Skonfiguruj lokalne osadzenia Ollama (zalecane) lub dodaj klucz osadzeń w Ustawienia → AI → Embeddings.', + 'memory.health.remediation.embedding_dim_mismatch': + 'Model osadzeń zwraca nieprawidłowy rozmiar wektora (pamięć oczekuje 1024 wymiarów). Wybierz model o 1024 wymiarach lub poproś dostawcę o 1024 wymiary.', + 'memory.health.remediation.local_model_unavailable': + 'Wymagany model lokalny jest niedostępny. Zainstaluj/uruchom Ollama i pobierz model albo przełącz to zadanie na dostawcę chmurowego w Ustawienia → AI.', + 'memory.health.remediation.extraction_timeout': + 'Model ekstrakcji pamięci przekracza limit czasu, więc wiki ma niewielką strukturę. Zmień model ekstrakcji pamięci na szybszy w Ustawienia → AI.', + 'memory.health.remediation.summarizer_unavailable': + 'Brak dostępnego dostawcy podsumowań dla funkcji Twórz drzewa podsumowań. Włącz lokalną AI (Ollama) lub włącz podsumowywanie w chmurze w Ustawienia → AI → Pamięć.', + 'memory.health.remediation.transient': + 'Tymczasowy błąd przerwał przetwarzanie pamięci. Ponowna próba nastąpi automatycznie.', + 'memory.health.remediation.unknown': + 'Przetwarzanie pamięci napotkało problem. Sprawdź Ustawienia → AI w celu konfiguracji.', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'Artefakt: {title}', 'chat.artifact.generating': 'Tworzenie {kind}…', diff --git a/app/src/lib/i18n/pt.ts b/app/src/lib/i18n/pt.ts index 48a84e1a0e..9c8a335256 100644 --- a/app/src/lib/i18n/pt.ts +++ b/app/src/lib/i18n/pt.ts @@ -4541,6 +4541,32 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'Recusar armazenamento local', 'pages.settings.account.security': 'Segurança', 'pages.settings.account.securityDesc': 'Modo de armazenamento de segredos e status do chaveiro', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'Degradado', + 'memoryTree.status.degradedRecall': 'Recuperação semântica desativada', + 'memoryTree.status.degradedStructure': 'Estrutura do wiki incompleta', + 'memoryTree.status.extractionCoverage': + 'Cobertura de extração: {pct}% dos fragmentos têm estrutura', + 'memory.health.remediation.budget_exhausted': + 'Os embeddings de memória atingiram o orçamento gerenciado. Configure embeddings locais do Ollama (Configurações → IA → Incorporações) ou adicione sua própria chave de API de embeddings para continuar construindo a memória.', + 'memory.health.remediation.auth_missing': + 'Nenhuma credencial de embeddings encontrada. Faça login no OpenHuman ou configure embeddings locais do Ollama em Configurações → IA → Incorporações.', + 'memory.health.remediation.auth_invalid': + 'Suas credenciais de embeddings foram rejeitadas. Autentique-se novamente ou mude para embeddings locais do Ollama em Configurações → IA → Incorporações.', + 'memory.health.remediation.embeddings_unconfigured': + 'Nenhum provedor de embeddings está configurado, então a recuperação semântica está desativada. Configure embeddings locais do Ollama (recomendado) ou adicione uma chave de embeddings em Configurações → IA → Incorporações.', + 'memory.health.remediation.embedding_dim_mismatch': + 'O modelo de embeddings retorna o tamanho de vetor errado (a memória espera 1024 dimensões). Escolha um modelo de 1024 dimensões ou solicite 1024 dimensões ao seu provedor.', + 'memory.health.remediation.local_model_unavailable': + 'Um modelo local necessário não está disponível. Instale/execute o Ollama e baixe o modelo, ou mude esta carga de trabalho para um provedor de nuvem em Configurações → IA.', + 'memory.health.remediation.extraction_timeout': + 'O modelo de extração de memória está expirando o tempo limite, então o wiki tem pouca estrutura. Mude o modelo de extração de memória para um mais rápido em Configurações → IA.', + 'memory.health.remediation.summarizer_unavailable': + 'Nenhum provedor de resumo está disponível para Criar árvores de resumo. Ative a IA local (Ollama) ou ative o resumo na nuvem em Configurações → IA → Memória.', + 'memory.health.remediation.transient': + 'Um erro temporário interrompeu o processamento da memória. Será repetido automaticamente.', + 'memory.health.remediation.unknown': + 'O processamento da memória encontrou um problema. Verifique Configurações → IA para a configuração.', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'Artefato: {title}', 'chat.artifact.generating': 'Gerando {kind}…', diff --git a/app/src/lib/i18n/ru.ts b/app/src/lib/i18n/ru.ts index ab7ba95e13..ae550fdba2 100644 --- a/app/src/lib/i18n/ru.ts +++ b/app/src/lib/i18n/ru.ts @@ -4511,6 +4511,31 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': 'Отклонить локальное хранилище', 'pages.settings.account.security': 'Безопасность', 'pages.settings.account.securityDesc': 'Режим хранения секретов и статус связки ключей', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': 'Ухудшено', + 'memoryTree.status.degradedRecall': 'Семантический поиск отключён', + 'memoryTree.status.degradedStructure': 'Структура вики неполная', + 'memoryTree.status.extractionCoverage': 'Охват извлечения: {pct}% фрагментов имеют структуру', + 'memory.health.remediation.budget_exhausted': + 'Эмбеддинги памяти исчерпали управляемый бюджет. Настройте локальные эмбеддинги Ollama (Настройки → ИИ → Эмбеддинги) или добавьте свой ключ API для эмбеддингов, чтобы продолжить построение памяти.', + 'memory.health.remediation.auth_missing': + 'Учётные данные для эмбеддингов не найдены. Войдите в OpenHuman или настройте локальные эмбеддинги Ollama в разделе Настройки → ИИ → Эмбеддинги.', + 'memory.health.remediation.auth_invalid': + 'Ваши учётные данные для эмбеддингов отклонены. Пройдите аутентификацию заново или переключитесь на локальные эмбеддинги Ollama в разделе Настройки → ИИ → Эмбеддинги.', + 'memory.health.remediation.embeddings_unconfigured': + 'Поставщик эмбеддингов не настроен, поэтому семантический поиск отключён. Настройте локальные эмбеддинги Ollama (рекомендуется) или добавьте ключ эмбеддингов в разделе Настройки → ИИ → Эмбеддинги.', + 'memory.health.remediation.embedding_dim_mismatch': + 'Модель эмбеддингов возвращает неверный размер вектора (память ожидает 1024 измерения). Выберите модель с 1024 измерениями или запросите 1024 измерения у своего поставщика.', + 'memory.health.remediation.local_model_unavailable': + 'Требуемая локальная модель недоступна. Установите/запустите Ollama и загрузите модель либо переключите эту задачу на облачного поставщика в разделе Настройки → ИИ.', + 'memory.health.remediation.extraction_timeout': + 'Модель извлечения памяти превышает время ожидания, поэтому в вики мало структуры. Выберите более быструю модель извлечения памяти в разделе Настройки → ИИ.', + 'memory.health.remediation.summarizer_unavailable': + 'Нет доступного поставщика суммаризации для «Построить деревья сводок». Включите локальный ИИ (Ollama) или включите облачную суммаризацию в разделе Настройки → ИИ → Память.', + 'memory.health.remediation.transient': + 'Временная ошибка прервала обработку памяти. Повтор произойдёт автоматически.', + 'memory.health.remediation.unknown': + 'При обработке памяти возникла проблема. Проверьте конфигурацию в разделе Настройки → ИИ.', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': 'Артефакт: {title}', 'chat.artifact.generating': 'Создание {kind}…', diff --git a/app/src/lib/i18n/zh-CN.ts b/app/src/lib/i18n/zh-CN.ts index e9456e62f9..802395e954 100644 --- a/app/src/lib/i18n/zh-CN.ts +++ b/app/src/lib/i18n/zh-CN.ts @@ -4253,6 +4253,29 @@ const messages: TranslationMap = { 'keyring.settings.revokeConsent': '拒绝本地存储', 'pages.settings.account.security': '安全', 'pages.settings.account.securityDesc': '密钥存储模式和密钥链状态', + // #002 memory-pipeline-hardening: degraded badges + typed remediation. + 'memoryTree.status.statusDegraded': '已降级', + 'memoryTree.status.degradedRecall': '语义召回已禁用', + 'memoryTree.status.degradedStructure': 'Wiki 结构不完整', + 'memoryTree.status.extractionCoverage': '提取覆盖率:{pct}% 的片段具有结构', + 'memory.health.remediation.budget_exhausted': + '记忆嵌入已达到托管预算上限。请设置本地 Ollama 嵌入(设置 → AI → 向量嵌入),或添加你自己的嵌入 API 密钥以继续构建记忆。', + 'memory.health.remediation.auth_missing': + '未找到嵌入凭据。请登录 OpenHuman,或在设置 → AI → 向量嵌入 中设置本地 Ollama 嵌入。', + 'memory.health.remediation.auth_invalid': + '你的嵌入凭据被拒绝。请重新进行身份验证,或在设置 → AI → 向量嵌入 中切换到本地 Ollama 嵌入。', + 'memory.health.remediation.embeddings_unconfigured': + '未配置嵌入提供方,因此语义召回已关闭。请设置本地 Ollama 嵌入(推荐),或在设置 → AI → 向量嵌入 中添加嵌入密钥。', + 'memory.health.remediation.embedding_dim_mismatch': + '嵌入模型返回的向量大小不正确(记忆需要 1024 维)。请选择 1024 维的模型,或向你的提供方请求 1024 维。', + 'memory.health.remediation.local_model_unavailable': + '所需的本地模型不可用。请安装/运行 Ollama 并拉取模型,或在设置 → AI 中将此工作负载切换到云提供方。', + 'memory.health.remediation.extraction_timeout': + '记忆提取模型超时,因此 Wiki 结构很少。请在设置 → AI 中将记忆提取模型更换为更快的模型。', + 'memory.health.remediation.summarizer_unavailable': + '没有可用于构建摘要树的摘要提供方。请启用本地 AI(Ollama),或在设置 → AI → 记忆中启用云端摘要。', + 'memory.health.remediation.transient': '临时错误中断了记忆处理。将自动重试。', + 'memory.health.remediation.unknown': '记忆处理遇到问题。请在设置 → AI 中检查配置。', // Chat — agent-generated artifacts (#2779) 'chat.artifact.aria': '工件:{title}', 'chat.artifact.generating': '正在生成{kind}…', diff --git a/app/src/utils/tauriCommands/memoryTree.ts b/app/src/utils/tauriCommands/memoryTree.ts index 34104c3bc2..27390ec69d 100644 --- a/app/src/utils/tauriCommands/memoryTree.ts +++ b/app/src/utils/tauriCommands/memoryTree.ts @@ -753,7 +753,54 @@ export async function memoryTreeBackfillStatus(): Promise { * verbatim to a colored pill in the status panel — `paused` is the only * state the toggle directly influences. */ -export type MemoryTreePipelineStatusKind = 'running' | 'paused' | 'syncing' | 'error' | 'idle'; +export type MemoryTreePipelineStatusKind = + | 'running' + | 'paused' + | 'syncing' + | 'error' + | 'idle' + | 'degraded'; + +/** + * Stable typed failure codes the Rust `health::FailureCode` emits (#002). The + * UI maps each to a localized remediation string; `remediation_key` carries + * the i18n key directly so the panel renders the core's guidance verbatim. + */ +export type MemoryTreeFailureCode = + | 'budget_exhausted' + | 'auth_missing' + | 'auth_invalid' + | 'embeddings_unconfigured' + | 'embedding_dim_mismatch' + | 'local_model_unavailable' + | 'extraction_timeout' + | 'summarizer_unavailable' + | 'transient'; + +/** + * Typed pipeline failure (#002 FR-004). Mirrors Rust `health::PipelineFailure`. + * `remediation_key` is an i18n key (e.g. `memory.health.remediation.*`); the UI + * resolves it via `useT()`. `detail` is a short non-localized diagnostic + * string (never a secret) for logs/tooltips. + */ +export interface MemoryTreePipelineFailure { + code: MemoryTreeFailureCode; + class: 'transient' | 'unrecoverable'; + remediation_key: string; + detail?: string; +} + +/** + * "The pipeline ran but output quality is reduced" (#002 FR-002/FR-005). + * Mirrors Rust `health::DegradedState`. `semantic_recall` true when embeddings + * were skipped (no usable provider → recall falls back to recency); + * `structure` true when extraction yielded nothing across the board. + */ +export interface MemoryTreeDegradedState { + semantic_recall: boolean; + structure: boolean; + cause?: MemoryTreePipelineFailure | null; +} /** * Per-state job counters returned in {@link MemoryTreePipelineStatus}. Mirrors @@ -795,6 +842,24 @@ export interface MemoryTreePipelineStatus { is_syncing: boolean; /** Convenience flag: scheduler-gate mode is `off`. */ is_paused: boolean; + /** + * #002 (FR-002/FR-005): degradation snapshot. Optional for back-compat with + * older cores that don't emit it (the Rust field is `#[serde(default)]`); + * absent ⇒ treat as not degraded. + */ + degraded?: MemoryTreeDegradedState; + /** + * #002 (FR-004): the single first blocking/most-significant cause, rendered + * verbatim by the panel (resolving `remediation_key`). `null`/absent when + * the pipeline is healthy. + */ + first_blocking_cause?: MemoryTreePipelineFailure | null; + /** + * #002 (FR-010 / US5): fraction of chunks with ≥1 indexed entity, in + * `[0.0, 1.0]`. Near 0 with `total_chunks > 0` ⇒ extraction is producing no + * structure ("empty-but-built wiki"). Optional for back-compat. + */ + extraction_coverage?: number | null; } /** diff --git a/src/openhuman/about_app/catalog_data.rs b/src/openhuman/about_app/catalog_data.rs index 96c8fd735a..5fb7e5afeb 100644 --- a/src/openhuman/about_app/catalog_data.rs +++ b/src/openhuman/about_app/catalog_data.rs @@ -326,6 +326,16 @@ pub(super) const CAPABILITIES: &[Capability] = &[ status: CapabilityStatus::Beta, privacy: LOCAL_RAW, }, + Capability { + id: "intelligence.memory_pipeline_doctor", + name: "Memory Pipeline Doctor", + domain: "intelligence", + category: CapabilityCategory::Intelligence, + description: "Diagnose why the memory tree / wiki is empty or stalled. Walks each pipeline stage (embeddings config, scheduler gate, job queue, extraction/recall degradation, summary-tree precondition) and reports the single first blocking cause with an actionable fix, plus counters and extraction coverage. The agent can run it on itself; a typed 'first blocking cause' is surfaced in the Memory status panel, and jobs that failed under a now-fixed config can be requeued on demand via the `memory_tree_retry_failed` RPC.", + how_to: "Memory status panel shows the cause + fix; or ask the agent to diagnose memory; or `openhuman-core` RPC `memory_tree_doctor`", + status: CapabilityStatus::Beta, + privacy: LOCAL_RAW, + }, Capability { id: "intelligence.github_repo_memory_source", name: "GitHub Repo Memory Source", diff --git a/src/openhuman/config/schema/load.rs b/src/openhuman/config/schema/load.rs index 1749a0c3c2..8ca98c2322 100644 --- a/src/openhuman/config/schema/load.rs +++ b/src/openhuman/config/schema/load.rs @@ -1976,6 +1976,12 @@ impl Config { }; } + if let Some(raw) = env.get("OPENHUMAN_MEMORY_TREE_CLOUD_SUMMARIZATION") { + if let Some(val) = parse_env_bool("OPENHUMAN_MEMORY_TREE_CLOUD_SUMMARIZATION", &raw) { + self.memory_tree.cloud_summarization_opt_in = val; + } + } + // Auto-update overrides if let Some(flag) = env.get("OPENHUMAN_AUTO_UPDATE_ENABLED") { let normalized = flag.trim().to_ascii_lowercase(); diff --git a/src/openhuman/config/schema/storage_memory.rs b/src/openhuman/config/schema/storage_memory.rs index 8439c80e07..541bb760e0 100644 --- a/src/openhuman/config/schema/storage_memory.rs +++ b/src/openhuman/config/schema/storage_memory.rs @@ -347,6 +347,16 @@ pub struct MemoryTreeConfig { /// Env override: `OPENHUMAN_MEMORY_TREE_SMART_WALK_MODEL`. #[serde(default)] pub smart_walk_model: Option, + + /// Explicit opt-in to cloud-based summarization when local AI is disabled. + /// + /// Default `false` — "Build Summary Trees" was local-only before #002. + /// Enabling this routes workspace memory summaries to the configured cloud + /// provider. Set to `true` via Settings → AI → Memory or the env var + /// `OPENHUMAN_MEMORY_TREE_CLOUD_SUMMARIZATION=true` to acknowledge that + /// memory content will be sent to an external service. + #[serde(default)] + pub cloud_summarization_opt_in: bool, } /// Returns `None` so that existing installs that never opted into Phase 4 @@ -448,6 +458,7 @@ impl Default for MemoryTreeConfig { llm_backend: default_llm_backend(), cloud_llm_model: default_cloud_llm_model(), smart_walk_model: None, + cloud_summarization_opt_in: false, } } } diff --git a/src/openhuman/embeddings/factory.rs b/src/openhuman/embeddings/factory.rs index 6a2726a3e2..25bb698977 100644 --- a/src/openhuman/embeddings/factory.rs +++ b/src/openhuman/embeddings/factory.rs @@ -10,6 +10,16 @@ use super::provider_trait::EmbeddingProvider; use super::voyage::VoyageEmbedding; use super::{NoopEmbedding, OllamaEmbedding, OpenAiEmbedding}; +/// Whether to send the OpenAI `dimensions` request-body parameter for this +/// model. Only the `text-embedding-3-*` family honors it (it's how 3-large is +/// pinned to 1024 = `EMBEDDING_DIM`). Sending it to other models or to +/// arbitrary OpenAI-compatible servers (vLLM, text-embeddings-inference, +/// stricter LocalAI builds) makes those servers 400 on an unknown field, so we +/// gate on the model id rather than the provider kind. (Reviewer sanil-23, #3076.) +fn model_supports_dimensions(model: &str) -> bool { + model.starts_with("text-embedding-3-") +} + /// Creates an embedding provider based on the specified name and configuration. /// /// Supported provider names: @@ -38,16 +48,17 @@ pub fn create_embedding_provider( let base_url = crate::openhuman::inference::local::ollama_base_url(); Ok(Box::new(OllamaEmbedding::try_new(&base_url, model, dims)?)) } - "openai" => Ok(Box::new(OpenAiEmbedding::new( - "https://api.openai.com", - "", - model, - dims, - ))), + "openai" => Ok(Box::new( + OpenAiEmbedding::new("https://api.openai.com", "", model, dims) + .with_send_dimensions(model_supports_dimensions(model)), + )), "cohere" => Ok(Box::new(CohereEmbedding::new("", model, dims))), name if name.starts_with("custom:") => { let base_url = name.strip_prefix("custom:").unwrap_or(""); - Ok(Box::new(OpenAiEmbedding::new(base_url, "", model, dims))) + Ok(Box::new( + OpenAiEmbedding::new(base_url, "", model, dims) + .with_send_dimensions(model_supports_dimensions(model)), + )) } "none" => Ok(Box::new(NoopEmbedding)), unknown => Err(anyhow::anyhow!( @@ -78,20 +89,24 @@ pub fn create_embedding_provider_with_credentials( let base_url = crate::openhuman::inference::local::ollama_base_url(); Ok(Box::new(OllamaEmbedding::try_new(&base_url, model, dims)?)) } - "openai" => Ok(Box::new(OpenAiEmbedding::new( - "https://api.openai.com", - api_key, - model, - dims, - ))), + "openai" => Ok(Box::new( + OpenAiEmbedding::new("https://api.openai.com", api_key, model, dims) + .with_send_dimensions(model_supports_dimensions(model)), + )), "cohere" => Ok(Box::new(CohereEmbedding::new(api_key, model, dims))), "custom" => { let url = custom_endpoint.unwrap_or(""); - Ok(Box::new(OpenAiEmbedding::new(url, api_key, model, dims))) + Ok(Box::new( + OpenAiEmbedding::new(url, api_key, model, dims) + .with_send_dimensions(model_supports_dimensions(model)), + )) } name if name.starts_with("custom:") => { let url = custom_endpoint.unwrap_or_else(|| name.strip_prefix("custom:").unwrap_or("")); - Ok(Box::new(OpenAiEmbedding::new(url, api_key, model, dims))) + Ok(Box::new( + OpenAiEmbedding::new(url, api_key, model, dims) + .with_send_dimensions(model_supports_dimensions(model)), + )) } "none" => Ok(Box::new(NoopEmbedding)), unknown => Err(anyhow::anyhow!( diff --git a/src/openhuman/embeddings/mod.rs b/src/openhuman/embeddings/mod.rs index 2ccb769252..8e6fadddce 100644 --- a/src/openhuman/embeddings/mod.rs +++ b/src/openhuman/embeddings/mod.rs @@ -39,11 +39,14 @@ pub use factory::{ create_embedding_provider, create_embedding_provider_with_credentials, default_embedding_provider, default_local_embedding_provider, }; +// #002 FR-015: the memory-tree OpenAI-compat embedder reuses the same key +// resolution the embeddings RPC uses, so there is one source of truth. pub use noop::NoopEmbedding; pub use ollama::{OllamaEmbedding, DEFAULT_OLLAMA_DIMENSIONS, DEFAULT_OLLAMA_MODEL}; pub use openai::OpenAiEmbedding; pub use provider_trait::{format_embedding_signature, EmbeddingProvider}; pub use rpc::provider_from_config; +pub(crate) use rpc::resolve_api_key; pub use schemas::{ all_controller_schemas as all_embeddings_controller_schemas, all_registered_controllers as all_embeddings_registered_controllers, diff --git a/src/openhuman/embeddings/openai.rs b/src/openhuman/embeddings/openai.rs index 0c85acff4d..a3a289432e 100644 --- a/src/openhuman/embeddings/openai.rs +++ b/src/openhuman/embeddings/openai.rs @@ -14,6 +14,13 @@ pub struct OpenAiEmbedding { api_key: String, model: String, dims: usize, + /// When true, send `"dimensions": dims` in the request body. OpenAI's + /// `text-embedding-3-*` models honour this (Matryoshka — e.g. 3-large can + /// return 1024 instead of its native 3072). Off by default so providers + /// that don't accept the field — Voyage (uses `output_dimension`), Cohere, + /// LocalAI/Ollama — keep working unchanged. Set via + /// [`Self::with_send_dimensions`] for the OpenAI / custom-OpenAI paths. + send_dimensions: bool, } impl OpenAiEmbedding { @@ -24,9 +31,20 @@ impl OpenAiEmbedding { api_key: api_key.to_string(), model: model.to_string(), dims, + send_dimensions: false, } } + /// Opt into sending the OpenAI `dimensions` request parameter so a + /// reducible model (`text-embedding-3-large` / `-3-small`) returns exactly + /// `dims` floats instead of its native size. Only call this for genuine + /// OpenAI / OpenAI-compatible endpoints that implement the parameter — + /// see [`Self::send_dimensions`]. Returns `self` for builder chaining. + pub fn with_send_dimensions(mut self, send: bool) -> Self { + self.send_dimensions = send; + self + } + /// Returns the configured base URL. pub fn base_url(&self) -> &str { &self.base_url @@ -111,10 +129,17 @@ impl EmbeddingProvider for OpenAiEmbedding { self.model, texts.len(), url ); - let body = serde_json::json!({ + let mut body = serde_json::json!({ "model": self.model, "input": texts, }); + // Request a specific output size on OpenAI 3-* models (Matryoshka) so + // the vector matches `dims` (e.g. 3-large → 1024 for the memory tree's + // fixed EMBEDDING_DIM). Gated by `send_dimensions` because Voyage / + // Cohere / LocalAI don't accept this exact field. + if self.send_dimensions && self.dims > 0 { + body["dimensions"] = serde_json::json!(self.dims); + } // Retry loop: handles 429 Too Many Requests and 503 Service Unavailable // with Retry-After–aware exponential backoff. diff --git a/src/openhuman/embeddings/openai_tests.rs b/src/openhuman/embeddings/openai_tests.rs index f77a28eb93..6b1074d4b2 100644 --- a/src/openhuman/embeddings/openai_tests.rs +++ b/src/openhuman/embeddings/openai_tests.rs @@ -186,6 +186,45 @@ async fn embed_sends_auth_header() { p.embed(&["test"]).await.unwrap(); } +// #002: the OpenAI `dimensions` request param. Off by default (so Voyage / +// Cohere / Ollama, which don't accept this exact field, keep working); on when +// the OpenAI / custom factory branch opts in via `with_send_dimensions(true)`. + +#[tokio::test] +async fn embed_sends_dimensions_when_opted_in() { + let app = Router::new().route( + "/v1/embeddings", + post(|Json(body): Json| async move { + assert_eq!( + body["dimensions"], 1024, + "dimensions must be sent so 3-large returns 1024, not its native 3072" + ); + Json(serde_json::json!({ "data": [{ "embedding": vec![0.0_f32; 1024] }] })) + }), + ); + let url = start_mock(app).await; + let p = + OpenAiEmbedding::new(&url, "k", "text-embedding-3-large", 1024).with_send_dimensions(true); + p.embed(&["test"]).await.unwrap(); +} + +#[tokio::test] +async fn embed_omits_dimensions_by_default() { + let app = Router::new().route( + "/v1/embeddings", + post(|Json(body): Json| async move { + assert!( + body.get("dimensions").is_none(), + "dimensions must NOT be sent by default (Voyage/Cohere/Ollama reject it)" + ); + Json(serde_json::json!({ "data": [{ "embedding": [1.0] }] })) + }), + ); + let url = start_mock(app).await; + let p = OpenAiEmbedding::new(&url, "k", "m", 1); // no with_send_dimensions + p.embed(&["test"]).await.unwrap(); +} + #[tokio::test] async fn embed_skips_auth_header_when_key_empty() { let app = Router::new().route( diff --git a/src/openhuman/embeddings/rpc.rs b/src/openhuman/embeddings/rpc.rs index b7df3bb808..c4b5e6ea8f 100644 --- a/src/openhuman/embeddings/rpc.rs +++ b/src/openhuman/embeddings/rpc.rs @@ -401,7 +401,7 @@ pub fn provider_from_config(config: &Config) -> anyhow::Result String { +pub(crate) fn resolve_api_key(config: &Config, provider_name: &str) -> String { let slug = if provider_name.starts_with("custom:") { "custom" } else { diff --git a/src/openhuman/memory/ingest_pipeline.rs b/src/openhuman/memory/ingest_pipeline.rs index aa091acd03..cae9e88d01 100644 --- a/src/openhuman/memory/ingest_pipeline.rs +++ b/src/openhuman/memory/ingest_pipeline.rs @@ -503,9 +503,15 @@ mod tests { ); let rows = list_chunks(&cfg, &ListChunksQuery::default()).unwrap(); assert_eq!(rows[0].metadata.source_kind, SourceKind::Chat); + // #002 FR-002: `test_config()` configures NO embeddings provider, so the + // extract handler correctly SKIPS embedding rather than persisting a + // zero-vector that would silently poison semantic recall. The chunk is + // written embedding-less and stays re-embeddable once a provider is set + // up. (With a provider configured the embedding is present — see the + // `build_write_embedder` tests in memory_tree/score/embed/factory.rs.) assert!(get_chunk_embedding(&cfg, &out.chunk_ids[0]) .unwrap() - .is_some()); + .is_none()); } #[tokio::test] diff --git a/src/openhuman/memory/schema.rs b/src/openhuman/memory/schema.rs index b3d7cfc577..710dc83bef 100644 --- a/src/openhuman/memory/schema.rs +++ b/src/openhuman/memory/schema.rs @@ -48,6 +48,8 @@ pub fn all_controller_schemas() -> Vec { schemas("pipeline_status"), schemas("set_enabled"), schemas("smart_walk"), + schemas("doctor"), + schemas("retry_failed"), ] } @@ -143,6 +145,14 @@ pub fn all_registered_controllers() -> Vec { schema: schemas("smart_walk"), handler: handle_smart_walk, }, + RegisteredController { + schema: schemas("doctor"), + handler: handle_doctor, + }, + RegisteredController { + schema: schemas("retry_failed"), + handler: handle_retry_failed, + }, ] } @@ -772,10 +782,13 @@ pub fn schemas(function: &str) -> ControllerSchema { FieldSchema { name: "status", ty: TypeSchema::Enum { - variants: vec!["running", "paused", "syncing", "error", "idle"], + variants: vec![ + "running", "paused", "syncing", "degraded", "error", "idle", + ], }, - comment: "Coarse, UI-shaped status. paused wins over error wins \ - over syncing wins over running wins over idle.", + comment: "Coarse, UI-shaped status. Precedence: paused > error > \ + degraded > syncing > running > idle. `degraded` (#002) = \ + the pipeline runs but recall/structure is reduced.", required: true, }, FieldSchema { @@ -824,6 +837,41 @@ pub fn schemas(function: &str) -> ControllerSchema { comment: "True when scheduler-gate mode is `off`.", required: true, }, + FieldSchema { + name: "degraded", + ty: TypeSchema::Json, + comment: "#002 (FR-002/FR-004): object `{ semantic_recall: bool, \ + structure: bool, cause?: PipelineFailure }`. The pipeline \ + ran but output quality is reduced — `semantic_recall` when \ + embeddings were skipped, `structure` when extraction \ + yielded nothing. `cause` is the single precedence-resolved \ + failure (structure over semantic_recall) and is OMITTED \ + when no degradation is active; the recall/structure flags \ + are tracked independently behind it. The object itself is \ + always present (serde default). Distinct from a hard `error`.", + required: true, + }, + FieldSchema { + name: "first_blocking_cause", + ty: TypeSchema::Option(Box::new(TypeSchema::Json)), + comment: "#002 (FR-004): the single most-urgent typed cause as a \ + `PipelineFailure` object `{ code, class, remediation_key }`. \ + A failed job's classified reason wins over a soft \ + degradation cause. null when healthy. The UI resolves \ + `remediation_key` and renders it verbatim.", + required: false, + }, + FieldSchema { + name: "extraction_coverage", + ty: TypeSchema::Option(Box::new(TypeSchema::F64)), + comment: "#002 (FR-010): fraction [0.0, 1.0] of chunks with ≥1 \ + indexed entity. Near 0 with total_chunks > 0 means \ + extraction produces no structure. `null` when the metric \ + could not be measured (DB read error) — deliberately \ + distinct from a genuine `0.0` so a broken measurement is \ + never misreported as a structure failure.", + required: false, + }, ], }, "set_enabled" => ControllerSchema { @@ -864,6 +912,73 @@ pub fn schemas(function: &str) -> ControllerSchema { }, ], }, + "doctor" => ControllerSchema { + namespace: NAMESPACE, + function: "doctor", + description: "One-shot Memory pipeline diagnostic (#002). Walks each \ + stage (embeddings config, scheduler gate, job queue, extraction/recall \ + degradation, summary-tree precondition) and returns per-stage health, \ + the single first blocking cause (typed code + i18n remediation key), the \ + degraded snapshot, and counters. Exposed for the agent's self-diagnosis \ + and the CLI; cheap (config + queue counters + degraded flags, no live \ + network probe).", + inputs: vec![], + outputs: vec![ + FieldSchema { + name: "healthy", + ty: TypeSchema::Bool, + comment: "True when no stage is blocking (first_blocking_cause is null).", + required: true, + }, + FieldSchema { + name: "stages", + ty: TypeSchema::Json, + comment: "Ordered array of { stage, ok, failure?, note } — pipeline \ + order, so the first non-ok stage is the first blocking cause.", + required: true, + }, + FieldSchema { + name: "first_blocking_cause", + ty: TypeSchema::Option(Box::new(TypeSchema::Json)), + comment: "Typed { code, class, remediation_key, detail? } of the first \ + non-ok stage; null when healthy. Mirrors \ + pipeline_status.first_blocking_cause as an explicit Option.", + required: false, + }, + FieldSchema { + name: "degraded", + ty: TypeSchema::Json, + comment: "{ semantic_recall, structure, cause? } degradation snapshot.", + required: true, + }, + FieldSchema { + name: "counters", + ty: TypeSchema::Json, + comment: "{ total_chunks, jobs_ready, jobs_running, jobs_failed, \ + extraction_coverage: number|null }. extraction_coverage \ + is the fraction [0,1] of chunks with ≥1 indexed entity; \ + null when the metric could not be measured (DB error).", + required: true, + }, + ], + }, + "retry_failed" => ControllerSchema { + namespace: NAMESPACE, + function: "retry_failed", + description: "Requeue every terminally-failed mem_tree_jobs row back to \ + `ready` (#002 FR-011) so jobs that failed under a now-fixed config \ + (e.g. after adding an embeddings key) re-run without re-ingesting \ + source data. Resets the attempt budget and clears the typed failure \ + reason. Manual, on-demand retry — there is no automatic \ + requeue-on-sync yet.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "requeued", + ty: TypeSchema::U64, + comment: "Number of failed jobs flipped back to ready for retry.", + required: true, + }], + }, "memory_backfill_status" => ControllerSchema { namespace: NAMESPACE, function: "memory_backfill_status", @@ -1316,6 +1431,20 @@ fn handle_smart_walk(params: Map) -> ControllerFuture { }) } +fn handle_doctor(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(rpc::doctor_rpc(&config).await?) + }) +} + +fn handle_retry_failed(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(rpc::retry_failed_rpc(&config).await?) + }) +} + fn parse_value(v: Value) -> Result { serde_json::from_value(v).map_err(|e| format!("invalid params: {e}")) } diff --git a/src/openhuman/memory/tools.rs b/src/openhuman/memory/tools.rs index 1e3e8f0a6d..176a5d598d 100644 --- a/src/openhuman/memory/tools.rs +++ b/src/openhuman/memory/tools.rs @@ -1,8 +1,10 @@ +mod doctor; mod forget; mod recall; mod store; pub use crate::openhuman::memory::query::*; +pub use doctor::MemoryDoctorTool; pub use forget::MemoryForgetTool; pub use recall::MemoryRecallTool; pub use store::MemoryStoreTool; diff --git a/src/openhuman/memory/tools/doctor.rs b/src/openhuman/memory/tools/doctor.rs new file mode 100644 index 0000000000..c1071bdf36 --- /dev/null +++ b/src/openhuman/memory/tools/doctor.rs @@ -0,0 +1,96 @@ +//! Agent tool: diagnose the memory pipeline (#002 FR-009). +//! +//! Thin wrapper over [`health::run_doctor`] so the agent can self-diagnose an +//! empty / stalled wiki and tell the user the single first blocking cause + +//! how to fix it — the same report the `memory_tree_doctor` RPC and CLI +//! return. Read-only: takes no arguments and mutates nothing, so it carries no +//! security-gate (matching the read-only memory tools). + +use crate::openhuman::config::Config; +use crate::openhuman::memory_tree::health::async_run_doctor; +use crate::openhuman::tools::traits::{Tool, ToolResult}; +use async_trait::async_trait; +use serde_json::json; +use std::sync::Arc; + +/// Let the agent run the one-shot memory-pipeline diagnostic. +pub struct MemoryDoctorTool { + config: Arc, +} + +impl MemoryDoctorTool { + pub fn new(config: Arc) -> Self { + Self { config } + } +} + +#[async_trait] +impl Tool for MemoryDoctorTool { + fn name(&self) -> &str { + "memory_doctor" + } + + fn description(&self) -> &str { + "Diagnose why the memory tree / wiki is empty or stalled. Returns per-stage health \ + (embeddings config, scheduler gate, job queue, extraction/recall degradation, \ + summary-tree precondition), the single first blocking cause with a fix, and current \ + counters. Read-only — takes no arguments." + } + + fn parameters_schema(&self) -> serde_json::Value { + json!({ "type": "object", "properties": {}, "required": [] }) + } + + async fn execute(&self, _args: serde_json::Value) -> anyhow::Result { + let report = async_run_doctor(&self.config).await; + // Serialize the structured report so the model gets the typed stages + + // first_blocking_cause + counters verbatim (it can summarize for the + // user from there). serde of a plain struct can't fail here. + let payload = serde_json::to_string_pretty(&report) + .unwrap_or_else(|e| format!("{{\"error\":\"serialize doctor report: {e}\"}}")); + Ok(ToolResult::success(payload)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn test_config() -> (TempDir, Arc) { + let tmp = TempDir::new().unwrap(); + let mut cfg = Config::default(); + cfg.workspace_dir = tmp.path().to_path_buf(); + cfg.memory_tree.embedding_endpoint = None; + cfg.memory_tree.embedding_model = None; + (tmp, Arc::new(cfg)) + } + + #[test] + fn name_and_schema() { + let (_tmp, cfg) = test_config(); + let tool = MemoryDoctorTool::new(cfg); + assert_eq!(tool.name(), "memory_doctor"); + // No required args. + assert_eq!(tool.parameters_schema()["required"], json!([])); + } + + #[tokio::test] + async fn execute_returns_a_report_for_a_misconfigured_workspace() { + let _g = crate::openhuman::memory_tree::health::test_guard(); + let (_tmp, cfg) = test_config(); + // No embeddings provider, local AI off → unhealthy with a typed cause. + let tool = MemoryDoctorTool::new(cfg); + let result = tool.execute(json!({})).await.unwrap(); + assert!(!result.is_error); + let out = result.output(); + assert!( + out.contains("\"healthy\""), + "report should serialize: {out}" + ); + assert!( + out.contains("embeddings_unconfigured") || out.contains("\"healthy\": false"), + "misconfigured workspace should surface a blocking cause: {out}" + ); + } +} diff --git a/src/openhuman/memory_queue/handlers/mod.rs b/src/openhuman/memory_queue/handlers/mod.rs index 2192ef1578..e676a89a95 100644 --- a/src/openhuman/memory_queue/handlers/mod.rs +++ b/src/openhuman/memory_queue/handlers/mod.rs @@ -23,7 +23,7 @@ use crate::openhuman::memory_store::content::{ self as content_store, read as content_read, tags as content_tags, }; use crate::openhuman::memory_tree::score; -use crate::openhuman::memory_tree::score::embed::{build_embedder_from_config, pack_checked}; +use crate::openhuman::memory_tree::score::embed::{build_write_embedder, pack_checked}; use crate::openhuman::memory_tree::score::store as score_store; use crate::openhuman::memory_tree::tree::store as summary_store; use crate::openhuman::memory_tree::tree::{LeafRef, TreeFactory}; @@ -120,32 +120,52 @@ async fn handle_extract(config: &Config, job: &Job) -> Result { let scoring_cfg = score::ScoringConfig::from_config(config); let result = score::score_chunk(&chunk_with_body, &scoring_cfg).await?; let chunk_embedding: Option> = if result.kept { - match build_embedder_from_config(config) { - Ok(embedder) => match embedder.embed(&body).await { - Ok(vector) => match pack_checked(&vector) { - Ok(_) => Some(vector), - Err(e) => { - log::warn!( - "[memory::jobs] embed dim check failed chunk_id={} err={e:#} — skipping embedding", - chunk.id - ); - None - } - }, - Err(e) => { - log::warn!( - "[memory::jobs] embed failed chunk_id={} err={e:#} — continuing without embedding", - chunk.id - ); - None - } - }, - Err(e) => { + // #002 (FR-002): when no usable embeddings provider is configured the + // write path returns None instead of an InertEmbedder — we SKIP + // embedding (the chunk is persisted embedding-less and re-embeddable + // later) rather than writing a fake all-zero vector that would + // silently poison semantic recall. `build_write_embedder` has already + // marked the process-global semantic-recall degraded flag with a typed + // cause for the status / doctor surface. + match build_write_embedder(config).context("build embedder in extract handler")? { + None => { log::warn!( - "[memory::jobs] build embedder failed err={e:#} — continuing without embedding" + "[memory::jobs] extract chunk_id={} — embeddings unavailable, \ + skipping embed (semantic recall degraded)", + chunk.id ); None } + Some(embedder) => { + // Reuse the body already read — avoid a second disk read. + let vector = match embedder.embed(&body).await { + Ok(v) => v, + Err(e) => { + // #002: classify the embed failure so the worker can + // fail fast on unrecoverable causes (budget/auth/dim) + // and surface a typed reason, instead of burning the + // retry budget. The typed failure is the outer + // (downcast) error; the original chain is context. + let failure = + crate::openhuman::memory_tree::health::classify_embed_error(&e); + return Err(anyhow::Error::new(failure).context(format!( + "embed chunk_id={} in extract handler: {e:#}", + chunk.id + ))); + } + }; + // Preserve the pre-cutover dimension guard (the job fails fast + // on a misconfigured embedder) even though #1574 no longer + // persists the packed blob to the legacy + // `mem_tree_chunks.embedding` column — the vector now goes to + // the per-model sidecar instead. + pack_checked(&vector).with_context(|| { + format!("validate embedding dims for chunk_id={}", chunk.id) + })?; + // A real embed succeeded — recall is healthy again. + crate::openhuman::memory_tree::health::clear_semantic_recall_degraded(); + Some(vector) + } } } else { None @@ -687,8 +707,29 @@ async fn handle_reembed_backfill(config: &Config, job: &Job) -> Result e, + None => { + crate::openhuman::memory_queue::set_backfill_in_progress(false); + log::warn!( + "[memory::jobs] reembed_backfill: sig={active_sig} — no usable embeddings \ + provider, skipping backfill (rows stay re-embeddable; semantic recall degraded)" + ); + return Ok(JobOutcome::Done); + } + }; let mut chunk_vecs: Vec<(String, Vec)> = Vec::new(); for id in &chunk_ids { match content_read::read_chunk_body(config, id) { diff --git a/src/openhuman/memory_queue/handlers/mod_tests.rs b/src/openhuman/memory_queue/handlers/mod_tests.rs index 924fc5df2a..f346ff9203 100644 --- a/src/openhuman/memory_queue/handlers/mod_tests.rs +++ b/src/openhuman/memory_queue/handlers/mod_tests.rs @@ -38,6 +38,8 @@ fn mk_running_job(kind: JobKind, payload_json: String) -> Job { created_at_ms: now_ms, started_at_ms: Some(now_ms), completed_at_ms: None, + failure_reason: None, + failure_class: None, } } @@ -132,7 +134,12 @@ async fn reembed_backfill_repopulates_then_completes() { chunk_id, Chunk, Metadata, SourceKind, SourceRef, }; - let (_tmp, cfg) = test_config(); + let (_tmp, mut cfg) = test_config(); + // Deliberate "none" opt-out → build_write_embedder yields an InertEmbedder + // (correct-dim zero vectors, no network). This test pins backfill + // *mechanics* (worklist → sidecar write → Defer/Done), not embed quality; + // the no-provider skip path is covered separately. + cfg.embeddings_provider = Some("none".to_string()); let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); let chunk = Chunk { id: chunk_id(SourceKind::Chat, "slack:#eng", 0, "reembed-seed"), @@ -212,6 +219,85 @@ async fn reembed_backfill_repopulates_then_completes() { ); } +/// #002 (FR-002) regression gate: when NO usable embeddings provider is +/// configured, the re-embed backfill must SKIP (return `Done`) instead of +/// falling back to an `InertEmbedder` and persisting all-zero vectors that +/// would silently poison semantic recall — the same hazard the extract and +/// seal write paths already guard against. The chunk stays embedding-less at +/// the active signature (re-embeddable once a provider is configured). +#[tokio::test] +async fn reembed_backfill_skips_when_no_provider() { + use crate::openhuman::memory_store::chunks::store::{ + get_chunk_embedding_for_signature, tree_active_signature, upsert_chunks, + upsert_staged_chunks_tx, + }; + use crate::openhuman::memory_store::chunks::types::{ + chunk_id, Chunk, Metadata, SourceKind, SourceRef, + }; + + // Default test config leaves embeddings unconfigured (no endpoint/model, + // provider unset) — the no-provider path build_write_embedder guards. + // + // Hold the shared health test-guard: the no-provider path marks the + // process-global semantic-recall degraded flag, so the guard resets it on + // entry and keeps the signal from leaking into parallel status tests. + let _health_guard = crate::openhuman::memory_tree::health::test_guard(); + let (_tmp, cfg) = test_config(); + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + let chunk = Chunk { + id: chunk_id(SourceKind::Chat, "slack:#eng", 0, "no-provider-seed"), + content: "memory content with no embeddings provider configured".into(), + metadata: Metadata { + source_kind: SourceKind::Chat, + source_id: "slack:#eng".into(), + owner: "alice".into(), + timestamp: ts, + time_range: (ts, ts), + tags: vec![], + source_ref: Some(SourceRef::new("slack://x")), + path_scope: None, + }, + token_count: 12, + seq_in_source: 0, + created_at: ts, + partial_message: false, + }; + upsert_chunks(&cfg, &[chunk.clone()]).unwrap(); + let content_root = cfg.memory_tree_content_root(); + std::fs::create_dir_all(&content_root).unwrap(); + let staged = content_store::stage_chunks(&content_root, &[chunk.clone()]).unwrap(); + with_connection(&cfg, |conn| { + let tx = conn.unchecked_transaction()?; + upsert_staged_chunks_tx(&tx, &staged)?; + tx.commit()?; + Ok(()) + }) + .unwrap(); + + let sig = tree_active_signature(&cfg); + let job = mk_running_job( + JobKind::ReembedBackfill, + serde_json::to_string(&ReembedBackfillPayload { + signature: sig.clone(), + }) + .unwrap(), + ); + + // No provider → skip the whole backfill (Done), do NOT write a vector. + let out = handle_reembed_backfill(&cfg, &job).await.unwrap(); + assert_eq!( + out, + JobOutcome::Done, + "no usable provider must skip the backfill, not Defer/embed" + ); + assert!( + get_chunk_embedding_for_signature(&cfg, &chunk.id, &sig) + .unwrap() + .is_none(), + "no zero/inert vector may be persisted when no provider is configured" + ); +} + /// #1574 §6 regression gate: a terminal-failure chunk (its body file is /// missing on disk, despite the metadata row staying staged) is /// persistently tombstoned by `mark_chunk_reembed_skipped` on the first @@ -237,7 +323,11 @@ async fn reembed_backfill_tombstones_orphan_and_terminates() { chunk_id, Chunk, Metadata, SourceKind, SourceRef, }; - let (_tmp, cfg) = test_config(); + let (_tmp, mut cfg) = test_config(); + // Deliberate "none" opt-out → InertEmbedder (zero vectors, no network) so + // the backfill reaches the orphan body-read and tombstones it; this test + // pins the tombstone-and-terminate mechanics, not embed quality. + cfg.embeddings_provider = Some("none".to_string()); let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); let chunk = Chunk { id: chunk_id(SourceKind::Chat, "slack:#eng", 0, "orphan-seed"), diff --git a/src/openhuman/memory_queue/store.rs b/src/openhuman/memory_queue/store.rs index 60fd2fd881..eabafa09b5 100644 --- a/src/openhuman/memory_queue/store.rs +++ b/src/openhuman/memory_queue/store.rs @@ -131,7 +131,8 @@ pub fn claim_next(config: &Config, lock_duration_ms: i64) -> Result> ) RETURNING id, kind, payload_json, dedupe_key, status, attempts, max_attempts, available_at_ms, locked_until_ms, last_error, - created_at_ms, started_at_ms, completed_at_ms", + created_at_ms, started_at_ms, completed_at_ms, + failure_reason, failure_class", params![now_ms, lock_until], row_to_job, ) @@ -196,10 +197,33 @@ pub fn mark_done(config: &Config, job: &Job) -> Result<()> { /// cannot clobber an active lessee's row — rows_affected == 0 is a silent /// no-op. pub fn mark_failed(config: &Config, job: &Job, error: &str) -> Result<()> { + mark_failed_typed(config, job, error, None) +} + +/// Like [`mark_failed`], but with an optional typed [`PipelineFailure`] +/// classification (#002). When `failure` is `Some` and **unrecoverable** +/// (budget exhausted, bad key, missing local model, dim mismatch), the job +/// terminates as `failed` **immediately** — no retry budget is burned, since +/// retrying the same input cannot succeed — and the typed +/// `failure_reason` / `failure_class` columns are persisted alongside the +/// freeform `last_error`. Transient classifications (and the untyped `None` +/// case) keep the existing attempts-bounded retry-with-backoff behaviour. +/// +/// The claim-token gate (`attempts` + `started_at_ms`) is preserved on every +/// branch so a stale lessee's settlement remains a silent no-op. +pub fn mark_failed_typed( + config: &Config, + job: &Job, + error: &str, + failure: Option<&crate::openhuman::memory_tree::health::PipelineFailure>, +) -> Result<()> { let job_id = &job.id; let attempts = job.attempts as i64; let max_attempts = job.max_attempts as i64; let claim_started_at = job.started_at_ms; + let unrecoverable = failure.map(|f| f.is_unrecoverable()).unwrap_or(false); + let failure_reason = failure.map(|f| f.code.as_str()); + let failure_class = failure.map(|f| f.class.as_str()); with_connection(config, |conn| { let now_ms = Utc::now().timestamp_millis(); @@ -207,21 +231,34 @@ pub fn mark_failed(config: &Config, job: &Job, error: &str) -> Result<()> { // may carry credential-shaped substrings; scrub before logging, // but keep the original in the DB column for diagnostics. let error_for_log = scrub_for_log(error); - if attempts >= max_attempts { + // Terminal when the retry budget is exhausted OR the failure is + // classified unrecoverable (fail fast — #002 FR-003). + if attempts >= max_attempts || unrecoverable { log::warn!( "[memory::jobs] terminal failure id={job_id} \ - attempts={attempts}/{max_attempts} err={error_for_log}" + attempts={attempts}/{max_attempts} unrecoverable={unrecoverable} \ + reason={failure_reason:?} err={error_for_log}" ); let n = conn.execute( "UPDATE mem_tree_jobs SET status = 'failed', completed_at_ms = ?1, locked_until_ms = NULL, - last_error = ?2 + last_error = ?2, + failure_reason = ?6, + failure_class = ?7 WHERE id = ?3 AND attempts = ?4 AND started_at_ms IS ?5", - params![now_ms, error, job_id, attempts, claim_started_at], + params![ + now_ms, + error, + job_id, + attempts, + claim_started_at, + failure_reason, + failure_class, + ], )?; if n == 0 { log::warn!( @@ -335,6 +372,43 @@ pub fn recover_stale_locks(config: &Config) -> Result { }) } +/// Requeue every terminally-`failed` job back to `ready` (#002 FR-011). +/// +/// Backs the manual `memory_tree_retry_failed` RPC: once the user fixes the +/// underlying cause (e.g. adds an embeddings key, switches to a faster +/// extraction model), the jobs that failed under the old config re-run without +/// re-ingesting source data. Resets `attempts` to 0 (a fresh retry budget), +/// clears the typed `failure_reason` / `failure_class` and `last_error`, and +/// makes the row immediately available. Returns the number of jobs requeued. +/// +/// NOTE: there is currently **no automatic caller**. An automatic +/// requeue-on-sync was planned, but its hook lived on the upstream-removed +/// vault sync path and has not been re-homed, so requeue is **manual-only** +/// (the `memory_tree_retry_failed` RPC) for now. +pub fn requeue_failed(config: &Config) -> Result { + with_connection(config, |conn| { + let now_ms = Utc::now().timestamp_millis(); + let n = conn.execute( + "UPDATE mem_tree_jobs + SET status = 'ready', + attempts = 0, + available_at_ms = ?1, + locked_until_ms = NULL, + started_at_ms = NULL, + completed_at_ms = NULL, + last_error = NULL, + failure_reason = NULL, + failure_class = NULL + WHERE status = 'failed'", + params![now_ms], + )?; + if n > 0 { + log::info!("[memory::jobs] requeued {n} failed job(s) for retry"); + } + Ok(n as u64) + }) +} + /// Release this process's in-flight job locks on a *graceful* shutdown: /// flip every `running` row back to `ready` so the work is immediately /// re-claimable on next launch instead of waiting out the lease and @@ -412,7 +486,8 @@ pub fn get_job(config: &Config, id: &str) -> Result> { .query_row( "SELECT id, kind, payload_json, dedupe_key, status, attempts, max_attempts, available_at_ms, locked_until_ms, last_error, - created_at_ms, started_at_ms, completed_at_ms + created_at_ms, started_at_ms, completed_at_ms, + failure_reason, failure_class FROM mem_tree_jobs WHERE id = ?1", params![id], row_to_job, @@ -436,6 +511,8 @@ fn row_to_job(row: &rusqlite::Row<'_>) -> rusqlite::Result { let created_at_ms: i64 = row.get(10)?; let started_at_ms: Option = row.get(11)?; let completed_at_ms: Option = row.get(12)?; + let failure_reason: Option = row.get(13)?; + let failure_class: Option = row.get(14)?; let kind = JobKind::parse(&kind_s).map_err(|e| { rusqlite::Error::FromSqlConversionFailure(1, rusqlite::types::Type::Text, e.into()) @@ -455,6 +532,8 @@ fn row_to_job(row: &rusqlite::Row<'_>) -> rusqlite::Result { available_at_ms, locked_until_ms, last_error, + failure_reason, + failure_class, created_at_ms, started_at_ms, completed_at_ms, @@ -515,6 +594,144 @@ mod tests { assert!(row.locked_until_ms.is_none()); } + /// T006/T007: the new `failure_reason`/`failure_class` columns must exist + /// (migration ran) and round-trip through `claim_next` (RETURNING) and + /// `get_job` (SELECT) as `None` until a classified failure is recorded + /// (T012 wires the write side). + #[test] + fn typed_failure_columns_roundtrip_as_none_by_default() { + let (_tmp, cfg) = test_config(); + let nj = NewJob::extract_chunk(&ExtractChunkPayload { + chunk_id: "c-typed-fail".into(), + }) + .unwrap(); + let id = enqueue(&cfg, &nj).unwrap().expect("inserted"); + + let claimed = claim_next(&cfg, DEFAULT_LOCK_DURATION_MS).unwrap().unwrap(); + assert_eq!(claimed.failure_reason, None); + assert_eq!(claimed.failure_class, None); + + let row = get_job(&cfg, &id).unwrap().unwrap(); + assert_eq!(row.failure_reason, None); + assert_eq!(row.failure_class, None); + } + + /// T012: an **unrecoverable** typed failure must terminate the job + /// immediately (status `failed`, `completed_at_ms` set) on the FIRST + /// attempt — no retry budget burned — and persist the typed + /// `failure_reason` / `failure_class` columns. A job with `max_attempts` + /// far above 1 proves the short-circuit isn't just the budget running out. + #[test] + fn mark_failed_typed_unrecoverable_terminates_immediately() { + use crate::openhuman::memory_tree::health::{FailureCode, PipelineFailure}; + let (_tmp, cfg) = test_config(); + let mut nj = NewJob::extract_chunk(&ExtractChunkPayload { + chunk_id: "c-budget".into(), + }) + .unwrap(); + nj.max_attempts = Some(5); // plenty of budget left + let id = enqueue(&cfg, &nj).unwrap().expect("inserted"); + + let claimed = claim_next(&cfg, DEFAULT_LOCK_DURATION_MS).unwrap().unwrap(); + assert_eq!(claimed.attempts, 1, "first claim"); + let failure = PipelineFailure::new(FailureCode::BudgetExhausted); + mark_failed_typed(&cfg, &claimed, "Insufficient budget", Some(&failure)).unwrap(); + + let row = get_job(&cfg, &id).unwrap().unwrap(); + assert_eq!( + row.status, + JobStatus::Failed, + "unrecoverable failure must terminate on first attempt" + ); + assert!(row.completed_at_ms.is_some()); + assert_eq!(row.failure_reason.as_deref(), Some("budget_exhausted")); + assert_eq!(row.failure_class.as_deref(), Some("unrecoverable")); + assert_eq!(row.last_error.as_deref(), Some("Insufficient budget")); + } + + /// T012: a **transient** typed failure keeps the existing + /// attempts-bounded retry path — the job bounces back to `ready` with a + /// future `available_at_ms` and does NOT set the typed columns (they are + /// only persisted on a terminal classified failure). + /// T028 (FR-011): `requeue_failed` flips terminal `failed` jobs back to + /// `ready` with a fresh attempt budget and the typed failure cleared, so + /// they re-run after the cause is fixed. Non-failed jobs are untouched. + #[test] + fn requeue_failed_resets_failed_jobs_only() { + use crate::openhuman::memory_tree::health::{FailureCode, PipelineFailure}; + let (_tmp, cfg) = test_config(); + + // Job A: drive to terminal failure (max_attempts=1, unrecoverable). + let mut a = NewJob::extract_chunk(&ExtractChunkPayload { + chunk_id: "a".into(), + }) + .unwrap(); + a.max_attempts = Some(1); + let id_a = enqueue(&cfg, &a).unwrap().unwrap(); + let claim_a = claim_next(&cfg, DEFAULT_LOCK_DURATION_MS).unwrap().unwrap(); + mark_failed_typed( + &cfg, + &claim_a, + "Insufficient budget", + Some(&PipelineFailure::new(FailureCode::BudgetExhausted)), + ) + .unwrap(); + assert_eq!( + get_job(&cfg, &id_a).unwrap().unwrap().status, + JobStatus::Failed + ); + + // Job B: leave ready (untouched control). + let b = NewJob::extract_chunk(&ExtractChunkPayload { + chunk_id: "b".into(), + }) + .unwrap(); + let id_b = enqueue(&cfg, &b).unwrap().unwrap(); + + let requeued = requeue_failed(&cfg).unwrap(); + assert_eq!(requeued, 1, "only the failed job should requeue"); + + let row_a = get_job(&cfg, &id_a).unwrap().unwrap(); + assert_eq!(row_a.status, JobStatus::Ready); + assert_eq!(row_a.attempts, 0, "attempt budget reset"); + assert_eq!(row_a.failure_reason, None, "typed reason cleared"); + assert_eq!(row_a.failure_class, None); + assert_eq!(row_a.last_error, None); + assert!(row_a.completed_at_ms.is_none()); + + // B was already ready — still ready, not double-counted. + assert_eq!( + get_job(&cfg, &id_b).unwrap().unwrap().status, + JobStatus::Ready + ); + } + + #[test] + fn mark_failed_typed_transient_still_retries() { + use crate::openhuman::memory_tree::health::{FailureCode, PipelineFailure}; + let (_tmp, cfg) = test_config(); + let mut nj = NewJob::extract_chunk(&ExtractChunkPayload { + chunk_id: "c-transient".into(), + }) + .unwrap(); + nj.max_attempts = Some(5); + let id = enqueue(&cfg, &nj).unwrap().expect("inserted"); + + let claimed = claim_next(&cfg, DEFAULT_LOCK_DURATION_MS).unwrap().unwrap(); + let failure = PipelineFailure::new(FailureCode::Transient); + mark_failed_typed(&cfg, &claimed, "503 upstream", Some(&failure)).unwrap(); + + let row = get_job(&cfg, &id).unwrap().unwrap(); + assert_eq!( + row.status, + JobStatus::Ready, + "transient failure must retry, not terminate" + ); + assert!(row.available_at_ms > Utc::now().timestamp_millis()); + assert_eq!(row.failure_reason, None, "typed cols unset on retry"); + assert_eq!(row.failure_class, None); + } + #[test] fn enqueue_dedupes_active_jobs() { let (_tmp, cfg) = test_config(); diff --git a/src/openhuman/memory_queue/types.rs b/src/openhuman/memory_queue/types.rs index bb999f601b..f86dda0131 100644 --- a/src/openhuman/memory_queue/types.rs +++ b/src/openhuman/memory_queue/types.rs @@ -283,6 +283,14 @@ pub struct Job { pub available_at_ms: i64, pub locked_until_ms: Option, pub last_error: Option, + /// Typed failure code (e.g. "budget_exhausted") set when a job is marked + /// `failed` with a classified reason; `None` otherwise. Distinct from the + /// freeform `last_error` — this is the machine-readable cause the + /// status/doctor surface renders. + pub failure_reason: Option, + /// Failure class ("transient" | "unrecoverable") paired with + /// `failure_reason`; `None` until a classified failure is recorded. + pub failure_class: Option, pub created_at_ms: i64, pub started_at_ms: Option, pub completed_at_ms: Option, diff --git a/src/openhuman/memory_queue/worker.rs b/src/openhuman/memory_queue/worker.rs index 8795e4fa5c..95315e5160 100644 --- a/src/openhuman/memory_queue/worker.rs +++ b/src/openhuman/memory_queue/worker.rs @@ -18,10 +18,11 @@ use crate::openhuman::config::Config; use crate::openhuman::memory_queue::handlers; use crate::openhuman::memory_queue::redact::scrub_for_log; use crate::openhuman::memory_queue::store::{ - claim_next, mark_deferred, mark_done, mark_failed, recover_stale_locks, release_running_locks, - DEFAULT_LOCK_DURATION_MS, + claim_next, mark_deferred, mark_done, mark_failed_typed, recover_stale_locks, + release_running_locks, DEFAULT_LOCK_DURATION_MS, }; use crate::openhuman::memory_queue::types::JobOutcome; +use crate::openhuman::memory_tree::health::PipelineFailure; /// Number of concurrent job-worker tasks. Each worker claims one job /// at a time via `claim_next` (atomic UPDATE under SQLite WAL with @@ -257,13 +258,20 @@ pub async fn run_once(config: &Config) -> Result { // the same chain after `scrub_for_log`, since anyhow chains // commonly embed upstream HTTP bodies / auth headers. let message = format!("{err:#}"); + // #002: if the error chain carries a typed `PipelineFailure` + // (attached at the embed/extract boundary), pass it through so + // `mark_failed_typed` can fail fast on unrecoverable causes + // (budget/auth/dim) instead of burning the retry budget, and + // persist the typed reason for the status/doctor surface. + let typed = err.downcast_ref::(); log::warn!( - "[memory::jobs] job failed id={} kind={} err={}", + "[memory::jobs] job failed id={} kind={} reason={:?} err={}", job.id, job.kind.as_str(), + typed.map(|f| f.code.as_str()), scrub_for_log(&message) ); - mark_failed(config, &job, &message)?; + mark_failed_typed(config, &job, &message, typed)?; } } @@ -542,7 +550,11 @@ mod tests { #[tokio::test] async fn run_once_reschedules_reembed_backfill_jobs_that_defer() { - let (_tmp, cfg) = test_config(); + let (_tmp, mut cfg) = test_config(); + // Deliberate "none" opt-out → InertEmbedder (zero vectors, no network) + // so the backfill has work and Defers; this test pins the worker's + // defer-reschedule path, not embed quality. + cfg.embeddings_provider = Some("none".to_string()); let ts = Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); let chunk = Chunk { id: chunk_id(SourceKind::Chat, "slack:#eng", 0, "reembed-worker-seed"), diff --git a/src/openhuman/memory_store/chunks/connection.rs b/src/openhuman/memory_store/chunks/connection.rs index e7ff827797..28ef7cea07 100644 --- a/src/openhuman/memory_store/chunks/connection.rs +++ b/src/openhuman/memory_store/chunks/connection.rs @@ -329,6 +329,12 @@ fn apply_schema(conn: &Connection) -> Result<()> { "is_user", "INTEGER NOT NULL DEFAULT 0", )?; + // #002 memory-pipeline-hardening: typed failure metadata on jobs so the + // worker can fail-fast on unrecoverable errors and the status/doctor + // surface can show an actionable cause. Both nullable; only set when a + // job is marked `failed` with a classified reason. + add_column_if_missing(conn, "mem_tree_jobs", "failure_reason", "TEXT")?; + add_column_if_missing(conn, "mem_tree_jobs", "failure_class", "TEXT")?; Ok(()) } diff --git a/src/openhuman/memory_store/chunks/store.rs b/src/openhuman/memory_store/chunks/store.rs index ea470622cd..cc3288acdd 100644 --- a/src/openhuman/memory_store/chunks/store.rs +++ b/src/openhuman/memory_store/chunks/store.rs @@ -322,7 +322,9 @@ CREATE TABLE IF NOT EXISTS mem_tree_jobs ( last_error TEXT, created_at_ms INTEGER NOT NULL, started_at_ms INTEGER, - completed_at_ms INTEGER + completed_at_ms INTEGER, + failure_reason TEXT, + failure_class TEXT ); CREATE INDEX IF NOT EXISTS idx_mem_tree_jobs_ready @@ -687,6 +689,34 @@ pub fn count_chunks(config: &Config) -> Result { }) } +/// #002 (FR-010 / US5): extraction coverage — the fraction of chunks that have +/// at least one indexed entity in `mem_tree_entity_index`, in `[0.0, 1.0]`. +/// +/// Turns "wiki built / not built" into a quality signal: a value near 0 with a +/// non-zero chunk count means extraction is producing nothing (the model is +/// timing out / failing), even though chunks exist — the "empty-but-built +/// wiki" symptom. Joins the entity index against `mem_tree_chunks.id` so the +/// numerator is node-kind-agnostic (we only count entity rows whose `node_id` +/// is an actual chunk). Returns `0.0` when there are no chunks. +pub fn extraction_coverage(config: &Config) -> Result { + with_connection(config, |conn| { + let total: i64 = + conn.query_row("SELECT COUNT(*) FROM mem_tree_chunks", [], |r| r.get(0))?; + if total <= 0 { + return Ok(0.0); + } + let covered: i64 = conn.query_row( + "SELECT COUNT(*) FROM mem_tree_chunks c + WHERE EXISTS ( + SELECT 1 FROM mem_tree_entity_index e WHERE e.node_id = c.id + )", + [], + |r| r.get(0), + )?; + Ok((covered.max(0) as f32) / (total as f32)) + }) +} + /// Set the lifecycle status column for `chunk_id`. See `CHUNK_STATUS_*`. pub fn set_chunk_lifecycle_status(config: &Config, chunk_id: &str, status: &str) -> Result<()> { with_connection(config, |conn| { diff --git a/src/openhuman/memory_store/chunks/store_tests.rs b/src/openhuman/memory_store/chunks/store_tests.rs index d09cde8307..0d73b0a6e0 100644 --- a/src/openhuman/memory_store/chunks/store_tests.rs +++ b/src/openhuman/memory_store/chunks/store_tests.rs @@ -1647,3 +1647,47 @@ fn global_topic_purge_removes_only_global_and_topic() { "source summary folder must survive the purge" ); } + +// ── extraction_coverage (#002 FR-010 / US5) ────────────────────────────── + +#[test] +fn extraction_coverage_empty_store_is_zero() { + let (_tmp, cfg) = test_config(); + assert_eq!(extraction_coverage(&cfg).unwrap(), 0.0); +} + +#[test] +fn extraction_coverage_reflects_indexed_fraction() { + let (_tmp, cfg) = test_config(); + // Two chunks; index an entity for only the first → coverage 0.5. + let c1 = sample_chunk("slack:#eng", 0, 1_700_000_000_000); + let c2 = sample_chunk("slack:#eng", 1, 1_700_000_001_000); + upsert_chunks(&cfg, &[c1.clone(), c2.clone()]).unwrap(); + + with_connection(&cfg, |conn| { + conn.execute( + "INSERT INTO mem_tree_entity_index + (entity_id, node_id, node_kind, entity_kind, surface, score, timestamp_ms) + VALUES (?1, ?2, 'leaf', 'person', 'Alice', 0.9, 1)", + params!["person:Alice", c1.id], + )?; + Ok(()) + }) + .unwrap(); + + let cov = extraction_coverage(&cfg).unwrap(); + assert!((cov - 0.5).abs() < 1e-6, "expected 0.5, got {cov}"); + + // Index the second chunk too → full coverage. + with_connection(&cfg, |conn| { + conn.execute( + "INSERT INTO mem_tree_entity_index + (entity_id, node_id, node_kind, entity_kind, surface, score, timestamp_ms) + VALUES (?1, ?2, 'leaf', 'person', 'Bob', 0.9, 2)", + params!["person:Bob", c2.id], + )?; + Ok(()) + }) + .unwrap(); + assert!((extraction_coverage(&cfg).unwrap() - 1.0).abs() < 1e-6); +} diff --git a/src/openhuman/memory_tree/health/doctor.rs b/src/openhuman/memory_tree/health/doctor.rs new file mode 100644 index 0000000000..41aa164985 --- /dev/null +++ b/src/openhuman/memory_tree/health/doctor.rs @@ -0,0 +1,401 @@ +//! One-shot memory-pipeline diagnostic (#002 FR-009). +//! +//! `run_doctor` walks each stage of the chunk→wiki + summary-tree pipeline and +//! returns a [`DoctorReport`]: per-stage health, the single first blocking +//! cause (so the agent / CLI gets one actionable answer instead of a wall of +//! counters), and the current counters. It is exposed as an agent tool and a +//! CLI/RPC method — there is no UI surface this round (the status panel +//! already renders `first_blocking_cause`). +//! +//! Design: this is a **config + persisted-state** diagnosis — it reads the +//! routing config, the scheduler-gate mode, the process-global degraded flags +//! (set by the embed/extract stages), the job-queue counters, and the chunk +//! count. It intentionally does **not** fire a live embed/extract probe in this +//! cut: a network call would make the doctor slow, flaky, and order-dependent, +//! and the degraded flags already capture "did the last real run fail and how". +//! A time-boxed live probe is a clean follow-up if we want pre-run validation. + +use serde::{Deserialize, Serialize}; + +use super::{current_degraded_state, DegradedState, FailureCode, PipelineFailure}; +use crate::openhuman::config::{Config, SchedulerGateMode}; + +/// Health of one named pipeline stage. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct StageHealth { + /// Stable stage id: `routing`, `scheduler_gate`, `embeddings`, + /// `extraction`, `queue`, `summary_tree`. + pub stage: String, + /// True when this stage is healthy / not blocking. + pub ok: bool, + /// Typed failure when `ok == false`; `None` when healthy. Carries the + /// i18n remediation key the surfaces render. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub failure: Option, + /// Short non-localized human note for logs / CLI (never a secret). + pub note: String, +} + +impl StageHealth { + fn ok(stage: &str, note: impl Into) -> Self { + Self { + stage: stage.to_string(), + ok: true, + failure: None, + note: note.into(), + } + } + + fn bad(stage: &str, failure: PipelineFailure, note: impl Into) -> Self { + Self { + stage: stage.to_string(), + ok: false, + failure: Some(failure), + note: note.into(), + } + } +} + +/// Current pipeline counters, mirrored from the status surface so the doctor +/// is a one-call snapshot. +// No `Eq`: `extraction_coverage` is `Option` — `f32` never implements `Eq`. +#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] +pub struct DoctorCounters { + pub total_chunks: u64, + pub jobs_ready: u64, + pub jobs_running: u64, + pub jobs_failed: u64, + /// #002 (FR-010 / US5): fraction of chunks with ≥1 indexed entity, in + /// `[0.0, 1.0]`. Near 0 with `total_chunks > 0` means extraction is + /// producing no structure. `None` when the metric could not be measured + /// (DB read error) — deliberately distinct from a genuine `0.0` so a + /// broken measurement is never misreported as a structure failure. + #[serde(default)] + pub extraction_coverage: Option, +} + +/// The full diagnostic. `first_blocking_cause` is the failure of the first +/// non-ok stage in pipeline order (`stages` is already ordered), so a caller +/// can act on one thing; `healthy` is the convenience roll-up. +// No `Eq`: transitively contains `DoctorCounters` (Option — f32: !Eq). +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct DoctorReport { + pub healthy: bool, + pub stages: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub first_blocking_cause: Option, + pub degraded: DegradedState, + pub counters: DoctorCounters, +} + +/// Run the diagnostic against `config` + persisted/queue/degraded state. +/// +/// Best-effort: counter reads that error degrade to 0 (the doctor is a +/// convenience, not an audit) and never fail the whole call. Stage order is +/// the pipeline order so the first non-ok stage is the first blocking cause. +pub fn run_doctor(config: &Config) -> DoctorReport { + use crate::openhuman::memory_queue::store as queue; + use crate::openhuman::memory_queue::types::JobStatus; + use crate::openhuman::memory_store::chunks::store as chunks; + + let degraded = current_degraded_state(); + let counters = DoctorCounters { + total_chunks: chunks::count_chunks(config).unwrap_or(0), + jobs_ready: queue::count_by_status(config, JobStatus::Ready).unwrap_or(0), + jobs_running: queue::count_by_status(config, JobStatus::Running).unwrap_or(0), + jobs_failed: queue::count_by_status(config, JobStatus::Failed).unwrap_or(0), + extraction_coverage: chunks::extraction_coverage(config).ok(), + }; + + let mut stages = Vec::new(); + + // 1. Routing/config sanity — is *any* embeddings provider configured? + // (`build_write_embedder` skips embedding when none is, so this is the + // most common "empty wiki" root cause.) + let embeddings_provider = config + .memory_tree + .embedding_endpoint + .as_deref() + .filter(|s| !s.trim().is_empty()) + .map(|_| "ollama-override".to_string()) + .or_else(|| config.embeddings_provider.clone()) + .filter(|s| !s.trim().is_empty()); + stages.push(match embeddings_provider.as_deref() { + // Explicit `none` opt-out: semantic recall is off by the user's choice, + // not a fault. Reported `ok` (consistent with a `scheduler_gate=off` + // pause and the write-path opt-out treatment) but with an honest note, + // so the prior "provider configured: none" can't read as a working + // embeddings provider. (CodeRabbit on doctor.rs) + Some("none") => StageHealth::ok( + "embeddings", + "embeddings disabled by you (provider = none) — semantic recall is intentionally off", + ), + Some(p) => StageHealth::ok("embeddings", format!("provider configured: {p}")), + None => StageHealth::bad( + "embeddings", + PipelineFailure::new(FailureCode::EmbeddingsUnconfigured), + "no embeddings provider configured — semantic recall is off", + ), + }); + + // 2. Scheduler gate — `off` means the user paused background work. Report + // it as a *user choice*, not a fault (ok == true), but note it so a + // confused "nothing is happening" reads clearly. + let gate_off = config.scheduler_gate.mode == SchedulerGateMode::Off; + stages.push(StageHealth::ok( + "scheduler_gate", + if gate_off { + "paused by you (scheduler gate = off) — background sync is intentionally stopped" + } else { + "auto — background sync runs" + }, + )); + + // 3. Queue health — failed jobs are a hard signal. The typed reason (when + // present on the most-recent failed row) is surfaced by the status RPC; + // here we just flag that failures exist and how many. + if counters.jobs_failed > 0 { + stages.push(StageHealth::bad( + "queue", + // The most-recent typed reason is surfaced by pipeline_status; + // doctor reports the count + a transient-by-default placeholder so + // the stage is non-ok and actionable. + PipelineFailure::new(FailureCode::Transient), + format!("{} failed job(s) in mem_tree_jobs", counters.jobs_failed), + )); + } else { + stages.push(StageHealth::ok("queue", "no failed jobs")); + } + + // 4. Degraded signals from the last real run. + if degraded.semantic_recall { + let cause = degraded + .cause + .clone() + .unwrap_or_else(|| PipelineFailure::new(FailureCode::EmbeddingsUnconfigured)); + stages.push(StageHealth::bad( + "extraction", + // semantic_recall degradation is an embeddings problem, but reuse + // the recorded cause which names the real reason. + cause, + "semantic recall degraded — embeddings were skipped on the last run", + )); + } else if degraded.structure { + let cause = degraded + .cause + .clone() + .unwrap_or_else(|| PipelineFailure::new(FailureCode::ExtractionTimeout)); + stages.push(StageHealth::bad( + "extraction", + cause, + "wiki structure degraded — extraction produced no entities on the last run", + )); + } else { + stages.push(StageHealth::ok("extraction", "no degradation recorded")); + } + + // 5. Summary-tree precondition. Reuse the runtime's own capability check + // (`tree_runtime::ops::summarizer_available`) so the doctor matches what + // "Build Summary Trees" will actually do — since #002 FR-007 it runs on + // the configured cloud provider when local AI is off, so local-AI-off is + // NOT a fault by itself. Only `bad` when no provider resolves at all. + let (summary_ok, summary_note) = + crate::openhuman::memory_tree::tree_runtime::ops::summarizer_available(config); + stages.push(if summary_ok { + StageHealth::ok("summary_tree", summary_note) + } else { + StageHealth::bad( + "summary_tree", + PipelineFailure::new(FailureCode::SummarizerUnavailable), + summary_note, + ) + }); + + let first_blocking_cause = stages + .iter() + .find(|s| !s.ok) + .and_then(|s| s.failure.clone()); + let healthy = first_blocking_cause.is_none(); + + DoctorReport { + healthy, + stages, + first_blocking_cause, + degraded, + counters, + } +} + +/// Async wrapper around [`run_doctor`] for async call sites (the RPC + agent +/// tool). `run_doctor` does synchronous SQLite reads (chunk/job counts + +/// extraction coverage); a contended DB could pin a Tokio worker for the +/// busy-timeout window, so offload the whole diagnostic to a blocking thread. +pub async fn async_run_doctor(config: &Config) -> DoctorReport { + let cfg = config.clone(); + match tokio::task::spawn_blocking(move || run_doctor(&cfg)).await { + Ok(report) => report, + Err(join_err) => { + // The blocking task panicked — surface a degraded-but-shaped report + // rather than propagating, since the doctor is a best-effort + // diagnostic and callers expect a report, not an error. + log::warn!("[memory_tree::health::doctor] run_doctor task failed: {join_err}"); + DoctorReport { + healthy: false, + stages: Vec::new(), + first_blocking_cause: Some(PipelineFailure::new(FailureCode::Transient)), + degraded: current_degraded_state(), + counters: DoctorCounters::default(), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn test_config() -> (TempDir, Config) { + let tmp = TempDir::new().unwrap(); + let mut cfg = Config::default(); + cfg.workspace_dir = tmp.path().to_path_buf(); + cfg.memory_tree.embedding_endpoint = None; + cfg.memory_tree.embedding_model = None; + (tmp, cfg) + } + + #[test] + fn misconfigured_workspace_reports_embeddings_as_first_blocking_cause() { + let _g = super::super::test_guard(); + let (_tmp, mut cfg) = test_config(); + cfg.embeddings_provider = None; // no provider at all + cfg.local_ai.runtime_enabled = false; + + let report = run_doctor(&cfg); + assert!(!report.healthy); + // Embeddings is stage 1, so it is the first blocking cause. + let cause = report.first_blocking_cause.expect("should have a cause"); + assert_eq!(cause.code, FailureCode::EmbeddingsUnconfigured); + // The embeddings stage is non-ok with the same code. + let embed = report + .stages + .iter() + .find(|s| s.stage == "embeddings") + .unwrap(); + assert!(!embed.ok); + } + + #[test] + fn healthy_when_embeddings_and_local_ai_configured() { + let _g = super::super::test_guard(); + let (_tmp, mut cfg) = test_config(); + cfg.embeddings_provider = Some("none".into()); // a configured choice + cfg.local_ai.runtime_enabled = true; + + let report = run_doctor(&cfg); + assert!( + report.healthy, + "expected healthy, got {:?}", + report.first_blocking_cause + ); + assert!(report.first_blocking_cause.is_none()); + // Every stage ok. + assert!( + report.stages.iter().all(|s| s.ok), + "stages: {:?}", + report.stages + ); + } + + #[test] + fn embeddings_none_opt_out_is_ok_but_note_is_honest() { + // `embeddings_provider = "none"` is a deliberate opt-out: the stage stays + // ok (a configured choice, like a paused scheduler gate) but the note must + // not read as a working provider ("provider configured: none"). (CodeRabbit) + let _g = super::super::test_guard(); + let (_tmp, mut cfg) = test_config(); + cfg.embeddings_provider = Some("none".into()); + cfg.local_ai.runtime_enabled = true; + + let report = run_doctor(&cfg); + let embed = report + .stages + .iter() + .find(|s| s.stage == "embeddings") + .unwrap(); + assert!(embed.ok, "opt-out is a choice, not a fault"); + assert!( + embed.note.contains("disabled") && embed.note.contains("intentionally off"), + "note must name the intentional opt-out, got: {}", + embed.note + ); + assert!( + !embed.note.contains("provider configured"), + "must not read as a working provider, got: {}", + embed.note + ); + } + + #[test] + fn scheduler_gate_off_is_a_choice_not_a_fault() { + use crate::openhuman::config::SchedulerGateMode; + let _g = super::super::test_guard(); + let (_tmp, mut cfg) = test_config(); + cfg.embeddings_provider = Some("ollama:bge-m3".into()); + cfg.local_ai.runtime_enabled = true; + cfg.scheduler_gate.mode = SchedulerGateMode::Off; + + let report = run_doctor(&cfg); + // Paused is reported but does NOT make the pipeline unhealthy. + assert!(report.healthy); + let gate = report + .stages + .iter() + .find(|s| s.stage == "scheduler_gate") + .unwrap(); + assert!(gate.ok); + assert!(gate.note.contains("paused")); + } + + /// #002 FR-007 / Gray review: the doctor's `summary_tree` stage must mirror + /// `summarizer_available` exactly. With local AI off and no cloud opt-in + /// (the default), the stage reports unavailable — which is correct, since + /// cloud summarization requires explicit consent. The stage must NOT fire + /// a generic "local AI required" hard-failure; it names the opt-in gap. + #[test] + fn local_ai_off_reports_no_provider_without_cloud_opt_in() { + let _g = super::super::test_guard(); + let (_tmp, mut cfg) = test_config(); + cfg.embeddings_provider = Some("ollama:bge-m3".into()); // embeddings ok + cfg.local_ai.runtime_enabled = false; // cloud opt-in not set (default false) + + let report = run_doctor(&cfg); + let tree = report + .stages + .iter() + .find(|s| s.stage == "summary_tree") + .unwrap(); + // summary_tree must mirror summarizer_available precisely. + assert_eq!( + tree.ok, + crate::openhuman::memory_tree::tree_runtime::ops::summarizer_available(&cfg).0, + "summary_tree health must mirror the runtime capability check" + ); + // Without opt-in, the note names the "no summarization provider" case. + assert!( + tree.note.contains("no summarization provider"), + "unexpected summary_tree note: {}", + tree.note + ); + } + + #[test] + fn report_serde_roundtrips() { + let _g = super::super::test_guard(); + let (_tmp, cfg) = test_config(); + let report = run_doctor(&cfg); + let json = serde_json::to_string(&report).unwrap(); + let back: DoctorReport = serde_json::from_str(&json).unwrap(); + assert_eq!(report, back); + } +} diff --git a/src/openhuman/memory_tree/health/mod.rs b/src/openhuman/memory_tree/health/mod.rs new file mode 100644 index 0000000000..8742897ad3 --- /dev/null +++ b/src/openhuman/memory_tree/health/mod.rs @@ -0,0 +1,684 @@ +//! Typed failure + degradation model for the memory pipeline. +//! +//! The chunk→wiki pipeline and the time-tree summarizer fail in several +//! distinct ways (budget exhausted, missing/invalid key, missing local +//! model, dimension mismatch, extraction timeout, transient network). +//! Historically these all collapsed into an opaque error string and were +//! retried identically — so a hard "Insufficient budget" 4xx burned the +//! retry budget and the user saw a generic `error: N failed jobs`. +//! +//! This module is the single source of truth that fixes that: +//! +//! - [`FailureCode`] enumerates every distinguishable cause. +//! - Each code maps to a [`FailureClass`] (`Transient` ⇒ retry with +//! backoff, `Unrecoverable` ⇒ fail fast) and a stable i18n +//! `remediation_key` so the status surface / doctor / job row all show +//! consistent, actionable text. Embeddings remediation leads with the +//! local-Ollama path (the steered primary fix), with BYO key secondary. +//! - [`PipelineFailure`] is a `std::error::Error`, so it can be wrapped in +//! `anyhow` and propagated up through the job processor, then downcast in +//! the queue worker to decide retry-vs-fail. +//! - [`DegradedState`] captures "the pipeline ran but recall/structure is +//! reduced" — surfaced so degraded output is never presented as success. + +use serde::{Deserialize, Serialize}; +use std::fmt; + +pub mod doctor; +pub use doctor::{async_run_doctor, run_doctor, DoctorCounters, DoctorReport, StageHealth}; + +/// Whether a failure should be retried (`Transient`) or fail fast +/// (`Unrecoverable`). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum FailureClass { + /// Retry with backoff up to `max_attempts` (network 5xx, timeouts, + /// truncated streams). + Transient, + /// Stop immediately — retrying the same input cannot succeed (budget + /// exhausted, bad/missing key, missing local model, dim mismatch). + Unrecoverable, +} + +impl FailureClass { + pub fn as_str(self) -> &'static str { + match self { + Self::Transient => "transient", + Self::Unrecoverable => "unrecoverable", + } + } +} + +/// A distinguishable pipeline failure cause. Each variant carries a fixed +/// [`FailureClass`] and i18n remediation key. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum FailureCode { + /// Managed embeddings route returned an out-of-budget error (4xx). + BudgetExhausted, + /// No auth/session available for the embeddings provider. + AuthMissing, + /// Auth present but rejected (expired/invalid key or JWT). + AuthInvalid, + /// No embeddings provider is configured at all. + EmbeddingsUnconfigured, + /// Provider returned vectors of an unexpected dimensionality. + EmbeddingDimMismatch, + /// A required local model (Ollama) is not available. + LocalModelUnavailable, + /// The extraction model timed out / exhausted retries. + ExtractionTimeout, + /// No summarization provider could be resolved for "Build Summary Trees" + /// — neither local AI nor a configured cloud chat provider. Distinct from + /// [`LocalModelUnavailable`](Self::LocalModelUnavailable), which implies the + /// local path was selected; this covers the cloud-only setup whose provider + /// failed to resolve, so the remediation names both paths. + SummarizerUnavailable, + /// Catch-all transient failure (network 5xx, timeout, truncated JSON). + Transient, +} + +impl FailureCode { + /// Stable wire string. + pub fn as_str(self) -> &'static str { + match self { + Self::BudgetExhausted => "budget_exhausted", + Self::AuthMissing => "auth_missing", + Self::AuthInvalid => "auth_invalid", + Self::EmbeddingsUnconfigured => "embeddings_unconfigured", + Self::EmbeddingDimMismatch => "embedding_dim_mismatch", + Self::LocalModelUnavailable => "local_model_unavailable", + Self::ExtractionTimeout => "extraction_timeout", + Self::SummarizerUnavailable => "summarizer_unavailable", + Self::Transient => "transient", + } + } + + pub fn from_str(s: &str) -> Option { + Some(match s { + "budget_exhausted" => Self::BudgetExhausted, + "auth_missing" => Self::AuthMissing, + "auth_invalid" => Self::AuthInvalid, + "embeddings_unconfigured" => Self::EmbeddingsUnconfigured, + "embedding_dim_mismatch" => Self::EmbeddingDimMismatch, + "local_model_unavailable" => Self::LocalModelUnavailable, + "extraction_timeout" => Self::ExtractionTimeout, + "summarizer_unavailable" => Self::SummarizerUnavailable, + "transient" => Self::Transient, + _ => return None, + }) + } + + /// Retry policy for this cause. + pub fn class(self) -> FailureClass { + match self { + Self::Transient | Self::ExtractionTimeout => FailureClass::Transient, + _ => FailureClass::Unrecoverable, + } + } + + /// i18n key for the user-facing remediation. Embeddings causes lead + /// with the local-Ollama path (the steered primary fix per spec FR-015). + pub fn remediation_key(self) -> &'static str { + match self { + Self::BudgetExhausted => "memory.health.remediation.budget_exhausted", + Self::AuthMissing => "memory.health.remediation.auth_missing", + Self::AuthInvalid => "memory.health.remediation.auth_invalid", + Self::EmbeddingsUnconfigured => "memory.health.remediation.embeddings_unconfigured", + Self::EmbeddingDimMismatch => "memory.health.remediation.embedding_dim_mismatch", + Self::LocalModelUnavailable => "memory.health.remediation.local_model_unavailable", + Self::ExtractionTimeout => "memory.health.remediation.extraction_timeout", + Self::SummarizerUnavailable => "memory.health.remediation.summarizer_unavailable", + Self::Transient => "memory.health.remediation.transient", + } + } +} + +/// A typed pipeline failure: a [`FailureCode`] plus the derived class + +/// remediation key (carried on the wire so the frontend stays +/// presentational) and an optional human-readable detail for logs/diagnosis. +/// +/// Implements [`std::error::Error`] so it can be `anyhow`-wrapped at the +/// embed/extract/summarize boundary, propagated through the job processor, +/// and downcast in the queue worker to drive retry-vs-fail. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct PipelineFailure { + pub code: FailureCode, + pub class: FailureClass, + /// i18n key — the frontend resolves this to localized remediation text. + pub remediation_key: String, + /// Optional non-localized detail for logs/diagnosis (never a secret). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub detail: Option, +} + +impl PipelineFailure { + /// Build a failure from a code, deriving class + remediation key. + pub fn new(code: FailureCode) -> Self { + Self { + code, + class: code.class(), + remediation_key: code.remediation_key().to_string(), + detail: None, + } + } + + /// Attach a non-localized detail string (truncated by callers; never + /// log secrets). + pub fn with_detail(mut self, detail: impl Into) -> Self { + self.detail = Some(detail.into()); + self + } + + /// True when this failure should fail fast (no retry budget). + pub fn is_unrecoverable(&self) -> bool { + self.class == FailureClass::Unrecoverable + } +} + +/// Classify an embedding-stage error into a typed [`PipelineFailure`]. +/// +/// The embed path bottoms out in `embeddings::openai::OpenAiEmbedding::embed`, +/// which on a non-2xx response bails with the message +/// `"Embedding API error (): "` (status is reqwest's +/// `StatusCode` Display, e.g. `402 Payment Required`). Dimension mismatches +/// surface from the memory-tree `CloudEmbedder`/trait validator as +/// `"... returned N dims, expected M"` or `"... dims, expected ..."`. We +/// parse those shapes to decide retry-vs-fail: +/// +/// - `401` / `403` → `auth_invalid` (a bearer was sent but rejected). +/// - `402` / `429` / a body mentioning budget/quota/insufficient → +/// `budget_exhausted` (the managed Voyage route is out of budget; the +/// user must bring their own key or top up — retrying won't help). +/// - dimension-mismatch text → `embedding_dim_mismatch`. +/// - everything else (5xx, timeouts, transport, unparseable) → `transient`, +/// so the worker's existing retry-with-backoff still applies. +/// +/// Operates on the flattened `anyhow` chain (`{err:#}`) so it still matches +/// when the embed error has been `.context()`-wrapped on the way up. +pub fn classify_embed_error(err: &anyhow::Error) -> PipelineFailure { + let msg = format!("{err:#}"); + classify_embed_error_str(&msg) +} + +/// String-level core of [`classify_embed_error`], split out so unit tests can +/// exercise the mapping without constructing reqwest errors. +pub fn classify_embed_error_str(msg: &str) -> PipelineFailure { + let lower = msg.to_ascii_lowercase(); + + // Dimension mismatch — the trait validator / CloudEmbedder rejects a + // vector whose length isn't EMBEDDING_DIM. Check before status parsing: + // it's a 2xx-but-wrong-shape case with no HTTP status to match. + if lower.contains("dims, expected") || lower.contains("dimensions, expected") { + return PipelineFailure::new(FailureCode::EmbeddingDimMismatch) + .with_detail(truncate_detail(msg)); + } + + // Budget/quota wording wins regardless of the numeric status — the + // managed backend may surface budget exhaustion as 4xx with an explicit + // body, and we always want the BYO-key remediation here. + if lower.contains("insufficient budget") + || lower.contains("budget") + || lower.contains("quota") + || lower.contains("payment required") + { + return PipelineFailure::new(FailureCode::BudgetExhausted) + .with_detail(truncate_detail(msg)); + } + + // Parse the HTTP status out of the `Embedding API error (): ...` + // shape. reqwest renders e.g. `402 Payment Required`, so the first + // 3-digit run after the opening paren is the code. + if let Some(code) = parse_http_status(msg) { + return match code { + 401 | 403 => { + PipelineFailure::new(FailureCode::AuthInvalid).with_detail(truncate_detail(msg)) + } + 402 | 429 => { + PipelineFailure::new(FailureCode::BudgetExhausted).with_detail(truncate_detail(msg)) + } + // 4xx other than the above is a hard client error retrying won't + // fix (malformed request, model not found); fail fast but tag it + // generically as auth_invalid's sibling — use Transient only for + // 5xx/unknown. We treat unknown 4xx as unrecoverable via + // budget? No — be conservative: only the known codes above are + // unrecoverable; other 4xx fall through to transient so we don't + // wedge on a transient 408/425. + 500..=599 => { + PipelineFailure::new(FailureCode::Transient).with_detail(truncate_detail(msg)) + } + _ => PipelineFailure::new(FailureCode::Transient).with_detail(truncate_detail(msg)), + }; + } + + // No recognizable status — transport error, timeout, connection reset, + // or an unparseable message. Treat as transient so retry/backoff applies. + PipelineFailure::new(FailureCode::Transient).with_detail(truncate_detail(msg)) +} + +/// Extract the first HTTP status code from an `Embedding API error ()` +/// message. Returns the leading 3-digit number inside the first parenthesised +/// group, if present. +fn parse_http_status(msg: &str) -> Option { + let open = msg.find('(')?; + let rest = &msg[open + 1..]; + let digits: String = rest + .trim_start() + .chars() + .take_while(|c| c.is_ascii_digit()) + .collect(); + if digits.len() == 3 { + digits.parse().ok() + } else { + None + } +} + +/// Cap a detail string so we never balloon logs / wire payloads with a full +/// provider response body. Never contains a secret (it's an error body), but +/// keep it short anyway. +fn truncate_detail(s: &str) -> String { + const MAX: usize = 200; + if s.chars().count() <= MAX { + return s.to_string(); + } + let truncated: String = s.chars().take(MAX).collect(); + format!("{truncated}…") +} + +impl fmt::Display for PipelineFailure { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} ({})", self.code.as_str(), self.class.as_str())?; + if let Some(detail) = &self.detail { + write!(f, ": {detail}")?; + } + Ok(()) + } +} + +impl std::error::Error for PipelineFailure {} + +/// "The pipeline ran, but output quality is reduced." Surfaced so degraded +/// results are never presented as success. +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct DegradedState { + /// True when embeddings were skipped (no usable provider) so semantic + /// recall falls back to recency-only. + pub semantic_recall: bool, + /// True when extraction yielded empty across the board so the wiki has + /// no entity/topic structure. + pub structure: bool, + /// The cause of the most significant degradation, when known. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cause: Option, +} + +impl DegradedState { + /// True when any degradation is present. + pub fn is_degraded(&self) -> bool { + self.semantic_recall || self.structure + } +} + +// ── Process-visible degradation flags ──────────────────────────────────── +// +// The embed/extract stages run deep inside the job worker, far from the +// `pipeline_status` RPC. Rather than thread a `DegradedState` return up +// through every call site, the stages set these process-global atomics when +// they detect a degraded condition (no usable embedder → semantic recall +// disabled; extraction empty across the board → no structure). The status / +// doctor surface reads them via [`current_degraded_state`]. They reflect the +// most recent run, are cheap, and never block — a coarse "is recall/structure +// currently degraded?" signal, intentionally not per-namespace. + +use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; + +static SEMANTIC_RECALL_DEGRADED: AtomicBool = AtomicBool::new(false); +static STRUCTURE_DEGRADED: AtomicBool = AtomicBool::new(false); +/// Per-flag degradation cause as a `FailureCode` discriminant (0 = none). +/// Tracked separately per flag so clearing one degradation can't leave the +/// other reporting a stale cause (e.g. mark recall, mark structure, clear +/// structure → recall must still report its OWN cause, not structure's). +static SEMANTIC_RECALL_CAUSE: AtomicU8 = AtomicU8::new(0); +static STRUCTURE_CAUSE: AtomicU8 = AtomicU8::new(0); + +fn code_to_u8(code: FailureCode) -> u8 { + match code { + FailureCode::BudgetExhausted => 1, + FailureCode::AuthMissing => 2, + FailureCode::AuthInvalid => 3, + FailureCode::EmbeddingsUnconfigured => 4, + FailureCode::EmbeddingDimMismatch => 5, + FailureCode::LocalModelUnavailable => 6, + FailureCode::ExtractionTimeout => 7, + FailureCode::SummarizerUnavailable => 8, + FailureCode::Transient => 9, + } +} + +fn u8_to_code(v: u8) -> Option { + Some(match v { + 1 => FailureCode::BudgetExhausted, + 2 => FailureCode::AuthMissing, + 3 => FailureCode::AuthInvalid, + 4 => FailureCode::EmbeddingsUnconfigured, + 5 => FailureCode::EmbeddingDimMismatch, + 6 => FailureCode::LocalModelUnavailable, + 7 => FailureCode::ExtractionTimeout, + 8 => FailureCode::SummarizerUnavailable, + 9 => FailureCode::Transient, + _ => return None, + }) +} + +/// Record that semantic recall is degraded (embeddings were skipped because no +/// usable provider is available). `cause` names why so the status surface can +/// lead the user to the fix. Idempotent / cheap; safe to call per embed-stage. +pub fn mark_semantic_recall_degraded(cause: FailureCode) { + SEMANTIC_RECALL_DEGRADED.store(true, Ordering::Relaxed); + SEMANTIC_RECALL_CAUSE.store(code_to_u8(cause), Ordering::Relaxed); +} + +/// Clear the semantic-recall degraded flag — call when an embed succeeds, so +/// the surface recovers once the user fixes the provider. Clears only this +/// flag's cause; a still-active structure degradation keeps its own. +pub fn clear_semantic_recall_degraded() { + SEMANTIC_RECALL_DEGRADED.store(false, Ordering::Relaxed); + SEMANTIC_RECALL_CAUSE.store(0, Ordering::Relaxed); +} + +/// Record that wiki structure is degraded (extraction yielded nothing across +/// the board). `cause` is typically [`FailureCode::ExtractionTimeout`]. +pub fn mark_structure_degraded(cause: FailureCode) { + STRUCTURE_DEGRADED.store(true, Ordering::Relaxed); + STRUCTURE_CAUSE.store(code_to_u8(cause), Ordering::Relaxed); +} + +/// Clear the structure degraded flag — call when extraction yields entities. +/// Clears only this flag's cause. +pub fn clear_structure_degraded() { + STRUCTURE_DEGRADED.store(false, Ordering::Relaxed); + STRUCTURE_CAUSE.store(0, Ordering::Relaxed); +} + +/// Test-only serialization + reset for the process-global degraded flags. +/// +/// The flags are a single process-wide signal, so tests across *different* +/// modules (factory, extract::llm, tree::rpc) that set or read them race under +/// cargo's parallel runner. Any such test must `let _g = test_guard();` at the +/// top: it takes a shared mutex (serialising all flag-touching tests) and +/// resets both flags to a clean baseline so the test starts deterministic. +#[cfg(test)] +pub fn test_guard() -> std::sync::MutexGuard<'static, ()> { + static LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); + let g = LOCK + .get_or_init(|| std::sync::Mutex::new(())) + .lock() + .unwrap_or_else(|p| p.into_inner()); + SEMANTIC_RECALL_DEGRADED.store(false, Ordering::Relaxed); + STRUCTURE_DEGRADED.store(false, Ordering::Relaxed); + SEMANTIC_RECALL_CAUSE.store(0, Ordering::Relaxed); + STRUCTURE_CAUSE.store(0, Ordering::Relaxed); + g +} + +/// Snapshot the current process-global [`DegradedState`] for the status / +/// doctor surface. The `cause` is populated from the last recorded +/// [`FailureCode`] when either flag is set. +pub fn current_degraded_state() -> DegradedState { + let semantic_recall = SEMANTIC_RECALL_DEGRADED.load(Ordering::Relaxed); + let structure = STRUCTURE_DEGRADED.load(Ordering::Relaxed); + // Each flag carries its own cause; pick the most actionable one to surface. + // Structure degradation (extraction failing → empty wiki) is reported first + // because it's the more severe "built but useless" symptom; otherwise the + // recall cause. Either way the cause reflects a CURRENTLY-active flag. + let cause = if structure { + u8_to_code(STRUCTURE_CAUSE.load(Ordering::Relaxed)).map(PipelineFailure::new) + } else if semantic_recall { + u8_to_code(SEMANTIC_RECALL_CAUSE.load(Ordering::Relaxed)).map(PipelineFailure::new) + } else { + None + }; + DegradedState { + semantic_recall, + structure, + cause, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const ALL_CODES: [FailureCode; 9] = [ + FailureCode::BudgetExhausted, + FailureCode::AuthMissing, + FailureCode::AuthInvalid, + FailureCode::EmbeddingsUnconfigured, + FailureCode::EmbeddingDimMismatch, + FailureCode::LocalModelUnavailable, + FailureCode::ExtractionTimeout, + FailureCode::SummarizerUnavailable, + FailureCode::Transient, + ]; + + #[test] + fn every_code_has_class_and_nonempty_remediation_key() { + for code in ALL_CODES { + let key = code.remediation_key(); + assert!( + !key.is_empty(), + "{} has empty remediation key", + code.as_str() + ); + assert!( + key.starts_with("memory.health.remediation."), + "{} remediation key has unexpected prefix: {key}", + code.as_str() + ); + // class() must be total (no panic); Transient + ExtractionTimeout + // are retryable, everything else is unrecoverable. + let class = code.class(); + match code { + FailureCode::Transient | FailureCode::ExtractionTimeout => { + assert_eq!( + class, + FailureClass::Transient, + "{} should be transient", + code.as_str() + ); + } + _ => { + assert_eq!( + class, + FailureClass::Unrecoverable, + "{} should be unrecoverable", + code.as_str() + ); + } + } + } + } + + #[test] + fn code_str_roundtrips() { + for code in ALL_CODES { + assert_eq!(FailureCode::from_str(code.as_str()), Some(code)); + } + assert_eq!(FailureCode::from_str("nonsense"), None); + } + + #[test] + fn new_fills_class_and_remediation_from_code() { + let f = PipelineFailure::new(FailureCode::BudgetExhausted); + assert_eq!(f.code, FailureCode::BudgetExhausted); + assert_eq!(f.class, FailureClass::Unrecoverable); + assert_eq!( + f.remediation_key, + "memory.health.remediation.budget_exhausted" + ); + assert!(f.detail.is_none()); + assert!(f.is_unrecoverable()); + } + + #[test] + fn with_detail_and_display() { + let f = PipelineFailure::new(FailureCode::Transient).with_detail("HTTP 503"); + assert_eq!(f.detail.as_deref(), Some("HTTP 503")); + assert!(!f.is_unrecoverable()); + assert_eq!(f.to_string(), "transient (transient): HTTP 503"); + } + + #[test] + fn pipeline_failure_serde_roundtrips() { + let f = PipelineFailure::new(FailureCode::EmbeddingDimMismatch).with_detail("got 3072"); + let json = serde_json::to_string(&f).unwrap(); + let back: PipelineFailure = serde_json::from_str(&json).unwrap(); + assert_eq!(f, back); + // detail omitted when None. + let none = PipelineFailure::new(FailureCode::AuthMissing); + assert!(!serde_json::to_string(&none).unwrap().contains("detail")); + } + + #[test] + fn degraded_state_default_is_healthy() { + let d = DegradedState::default(); + assert!(!d.is_degraded()); + let d2 = DegradedState { + structure: true, + ..Default::default() + }; + assert!(d2.is_degraded()); + } + + #[test] + fn pipeline_failure_is_error_and_downcasts_from_anyhow() { + let err: anyhow::Error = + anyhow::Error::new(PipelineFailure::new(FailureCode::BudgetExhausted)); + let downcast = err.downcast_ref::(); + assert!(downcast.is_some()); + assert!(downcast.unwrap().is_unrecoverable()); + } + + // ── classify_embed_error (T008) ────────────────────────────────────── + + #[test] + fn classify_budget_from_body_wording() { + // The managed Voyage route surfaces budget exhaustion in the body. + let f = classify_embed_error_str( + "Embedding API error (400 Bad Request): {\"error\":\"Insufficient budget\"}", + ); + assert_eq!(f.code, FailureCode::BudgetExhausted); + assert!(f.is_unrecoverable()); + } + + #[test] + fn classify_budget_from_402_and_429() { + for status in ["402 Payment Required", "429 Too Many Requests"] { + let f = classify_embed_error_str(&format!("Embedding API error ({status}): nope")); + assert_eq!( + f.code, + FailureCode::BudgetExhausted, + "status {status} should map to budget_exhausted" + ); + } + } + + #[test] + fn classify_auth_from_401_403() { + for status in ["401 Unauthorized", "403 Forbidden"] { + let f = classify_embed_error_str(&format!("Embedding API error ({status}): denied")); + assert_eq!(f.code, FailureCode::AuthInvalid, "status {status}"); + assert!(f.is_unrecoverable()); + } + } + + #[test] + fn classify_dim_mismatch() { + let f = classify_embed_error_str("cloud embedder returned 3072 dims, expected 1024"); + assert_eq!(f.code, FailureCode::EmbeddingDimMismatch); + assert!(f.is_unrecoverable()); + } + + #[test] + fn classify_5xx_is_transient() { + let f = classify_embed_error_str("Embedding API error (503 Service Unavailable): retry"); + assert_eq!(f.code, FailureCode::Transient); + assert!(!f.is_unrecoverable()); + } + + #[test] + fn classify_transport_error_is_transient() { + let f = classify_embed_error_str("error sending request for url (...): connection reset"); + assert_eq!(f.code, FailureCode::Transient); + assert!(!f.is_unrecoverable()); + } + + #[test] + fn classify_through_anyhow_context_chain() { + // The embed error is commonly `.context()`-wrapped on the way up; + // the flattened `{err:#}` must still classify. + let base = anyhow::anyhow!("Embedding API error (402 Payment Required): out of budget"); + let wrapped = base + .context("cloud embeddings failed") + .context("seal embed"); + let f = classify_embed_error(&wrapped); + assert_eq!(f.code, FailureCode::BudgetExhausted); + } + + #[test] + fn parse_http_status_extracts_leading_code() { + assert_eq!( + parse_http_status("Embedding API error (402 Payment Required): x"), + Some(402) + ); + assert_eq!(parse_http_status("no parens here"), None); + assert_eq!(parse_http_status("(not a status): x"), None); + } + + #[test] + fn truncate_detail_caps_length() { + let long = "x".repeat(500); + let out = truncate_detail(&long); + assert!(out.chars().count() <= 201, "got {}", out.chars().count()); + assert!(out.ends_with('…')); + } + + /// Regression (CodeRabbit): per-flag causes. Mark recall, then structure, + /// then clear structure — recall must still report its OWN cause, not the + /// (now-cleared) structure cause. With the old single shared slot this + /// surfaced the wrong remediation. + #[test] + fn degraded_cause_is_per_flag_not_shared() { + let _g = test_guard(); // resets both flags + causes + + // Recall degraded for embeddings reason; structure degraded for extraction. + mark_semantic_recall_degraded(FailureCode::EmbeddingsUnconfigured); + mark_structure_degraded(FailureCode::ExtractionTimeout); + + // Structure takes precedence while both are active. + let s = current_degraded_state(); + assert!(s.semantic_recall && s.structure); + assert_eq!( + s.cause.as_ref().map(|c| c.code), + Some(FailureCode::ExtractionTimeout) + ); + + // Clear structure — recall stays, and its cause must be the RECALL one, + // not the cleared structure cause. + clear_structure_degraded(); + let s = current_degraded_state(); + assert!(s.semantic_recall && !s.structure); + assert_eq!( + s.cause.as_ref().map(|c| c.code), + Some(FailureCode::EmbeddingsUnconfigured), + "recall must keep its own cause after structure clears" + ); + + // Clear recall too — fully healthy, no cause. + clear_semantic_recall_degraded(); + let s = current_degraded_state(); + assert!(!s.is_degraded()); + assert!(s.cause.is_none()); + } +} diff --git a/src/openhuman/memory_tree/mod.rs b/src/openhuman/memory_tree/mod.rs index a74a72d0e9..145de6e6a3 100644 --- a/src/openhuman/memory_tree/mod.rs +++ b/src/openhuman/memory_tree/mod.rs @@ -5,6 +5,7 @@ //! It is flavor-agnostic; the specific tree instances (global, topic, //! source) and their policies live in [`crate::openhuman::memory`]. +pub mod health; pub mod ingest; pub mod io; pub mod retrieval; diff --git a/src/openhuman/memory_tree/retrieval/integration_tests.rs b/src/openhuman/memory_tree/retrieval/integration_tests.rs index 60b03d39e2..226b34e066 100644 --- a/src/openhuman/memory_tree/retrieval/integration_tests.rs +++ b/src/openhuman/memory_tree/retrieval/integration_tests.rs @@ -29,6 +29,13 @@ fn test_config() -> (TempDir, Config) { cfg.memory_tree.embedding_endpoint = None; cfg.memory_tree.embedding_model = None; cfg.memory_tree.embedding_strict = false; + // #002 (FR-002): the write path now SKIPS embedding (returns None) when no + // provider is configured, instead of silently using a zero-vector inert + // embedder. These integration tests assert embeddings ARE populated + // end-to-end, so opt into the inert embedder explicitly — `provider=none` + // is the deterministic "vector search by choice" path that + // `build_write_embedder` returns as Some(inert). + cfg.embeddings_provider = Some("none".into()); (tmp, cfg) } diff --git a/src/openhuman/memory_tree/score/embed/factory.rs b/src/openhuman/memory_tree/score/embed/factory.rs index 71ea9c2603..e4a58eb9ac 100644 --- a/src/openhuman/memory_tree/score/embed/factory.rs +++ b/src/openhuman/memory_tree/score/embed/factory.rs @@ -58,83 +58,191 @@ fn cloud_session_available(config: &Config) -> bool { /// per call — cheap because `OllamaEmbedder` owns a cloned `reqwest::Client` /// internally and `InertEmbedder` is a ZST. pub fn build_embedder_from_config(config: &Config) -> Result> { + // Read path: walk the shared ladder, then terminate at InertEmbedder (zero + // vectors) so retrieval / semantic rerank can still run with no provider. + Ok(match resolve_embedder_choice(config)? { + EmbedderChoice::Ollama { + endpoint, + model, + timeout_ms, + } => { + log::debug!( + "[memory_tree::embed::factory] read → Ollama endpoint={endpoint} model={model} timeout_ms={timeout_ms}" + ); + Box::new(OllamaEmbedder::new(endpoint, model, timeout_ms)) + } + EmbedderChoice::OptOut => { + log::info!( + "[memory_tree::embed::factory] embeddings_provider=none — \ + using InertEmbedder (vector search disabled)" + ); + Box::new(InertEmbedder::new()) + } + EmbedderChoice::OpenAiCompat(openai) => { + log::debug!( + "[memory_tree::embed::factory] read → user OpenAI-compatible embeddings ({})", + openai.name() + ); + Box::new(openai) + } + EmbedderChoice::Cloud => { + log::debug!( + "[memory_tree::embed::factory] read → cloud (Voyage) — flip \ + 'Memory embeddings' in Local AI Settings to switch to local" + ); + Box::new(CloudEmbedder::new(config)) + } + EmbedderChoice::NoProvider => { + log::warn!( + "[memory_tree::embed::factory] no backend session found — \ + using InertEmbedder (zero vectors). Log in to OpenHuman, or \ + enable 'Memory embeddings' in Local AI Settings, to fix." + ); + Box::new(InertEmbedder::new()) + } + }) +} + +/// The embedder the resolution ladder selects, independent of whether the +/// caller is a read path (retrieval) or a write path (ingest/seal). Both +/// public factories walk [`resolve_embedder_choice`] and differ ONLY at the +/// terminal + degraded-flag side-effects — so "identical resolution for every +/// real provider" is a structural guarantee, not two hand-maintained copies +/// that could drift (reviewer sanil-23, #3076: a read/write provider mismatch +/// would silently corrupt recall). +enum EmbedderChoice { + /// Explicit Ollama override, or the unified `ollama:` workload setting. + Ollama { + endpoint: String, + model: String, + timeout_ms: u64, + }, + /// `embeddings_provider = "none"` — vector search off by deliberate user + /// choice (NOT a degradation). Both paths use `InertEmbedder`. + OptOut, + /// User-configured OpenAI / custom OpenAI-compatible endpoint (#002 FR-015). + OpenAiCompat(super::openai_compat::OpenAiCompatEmbedder), + /// Logged-in managed cloud (Voyage). + Cloud, + /// No usable provider. Read path → `InertEmbedder` (zero vectors); write + /// path → `None` (skip) + mark `semantic_recall` degraded. + NoProvider, +} + +/// Walk the provider-resolution ladder once. The order is the single source of +/// truth for both factories; the only read/write differences are encoded by the +/// callers at the terminal, never here. +fn resolve_embedder_choice(config: &Config) -> Result { let tree_cfg = &config.memory_tree; - match ( + + // 1. Explicit Ollama override (power-user / E2E rig). + if let (Some(endpoint), Some(model)) = ( tree_cfg.embedding_endpoint.as_deref(), tree_cfg.embedding_model.as_deref(), ) { - (Some(endpoint), Some(model)) - if !endpoint.trim().is_empty() && !model.trim().is_empty() => - { - let timeout_ms = tree_cfg.embedding_timeout_ms.unwrap_or(0); - log::debug!( - "[memory_tree::embed::factory] using Ollama endpoint={} model={} timeout_ms={}", - endpoint, - model, - timeout_ms + if !endpoint.trim().is_empty() && !model.trim().is_empty() { + return Ok(EmbedderChoice::Ollama { + endpoint: endpoint.to_string(), + model: model.to_string(), + timeout_ms: tree_cfg.embedding_timeout_ms.unwrap_or(0), + }); + } + } + + // 2. Deliberate opt-out — vector search off by user choice. + if config + .embeddings_provider + .as_deref() + .map(|s| s.trim()) + .is_some_and(|s| s == "none") + { + return Ok(EmbedderChoice::OptOut); + } + + // 3. Local Ollama via the unified workload setting. + if let Some(model) = config.workload_local_model("embeddings") { + return Ok(EmbedderChoice::Ollama { + endpoint: ollama_base_url(), + model, + timeout_ms: tree_cfg.embedding_timeout_ms.unwrap_or(0), + }); + } + + // 4. #002 FR-015: user-configured OpenAI / custom OpenAI-compatible. + if let Some(openai) = super::openai_compat::OpenAiCompatEmbedder::try_from_config(config)? { + return Ok(EmbedderChoice::OpenAiCompat(openai)); + } + + // 5. Logged-in managed cloud (Voyage). + if cloud_session_available(config) { + return Ok(EmbedderChoice::Cloud); + } + + // 6. Nothing usable. + Ok(EmbedderChoice::NoProvider) +} + +/// Build the embedder used by **write** paths (ingest extract + seal), with an +/// explicit "no usable embedder" signal (#002 FR-002). +/// +/// Identical resolution to [`build_embedder_from_config`] for every real +/// provider (explicit Ollama override, local Ollama, cloud session). The one +/// difference is the terminal fallback: where the read-path factory returns an +/// [`InertEmbedder`] (zero vectors) so retrieval can still run, the write path +/// returns **`Ok(None)`** so callers **skip** embedding instead of persisting a +/// fake all-zero vector that would silently poison semantic recall and present +/// a degraded result as success. The chunk/summary is written embedding-less +/// (re-embeddable later once a provider is configured), and the process-global +/// `semantic_recall` degraded flag is set with a typed cause so the status / +/// doctor surface can name the fix. +/// +/// `embeddings_provider = "none"` is treated as a deliberate opt-out, not a +/// degradation: it returns the [`InertEmbedder`] (vector search intentionally +/// off) without setting the degraded flag — same as the read path. +pub fn build_write_embedder(config: &Config) -> Result>> { + use crate::openhuman::memory_tree::health::{ + clear_semantic_recall_degraded, mark_semantic_recall_degraded, FailureCode, + }; + + // Write path: same ladder as the read factory, terminating at `None` (skip, + // don't persist zero vectors) + a typed degraded flag when no provider is + // usable. Every real-provider branch clears the flag; the deliberate + // "none" opt-out leaves it untouched (off by choice, not degradation). + Ok(match resolve_embedder_choice(config)? { + EmbedderChoice::Ollama { + endpoint, + model, + timeout_ms, + } => { + clear_semantic_recall_degraded(); + Some(Box::new(OllamaEmbedder::new(endpoint, model, timeout_ms))) + } + EmbedderChoice::OptOut => { + clear_semantic_recall_degraded(); + log::info!( + "[memory_tree::embed::factory] embeddings_provider=none — write path \ + uses InertEmbedder (vector search disabled by choice)" ); - Ok(Box::new(OllamaEmbedder::new( - endpoint.to_string(), - model.to_string(), - timeout_ms, - ))) + Some(Box::new(InertEmbedder::new())) } - _ => { - // If the user explicitly disabled embeddings, return InertEmbedder - // so semantic rerank degrades to recency-only ordering. - if config - .embeddings_provider - .as_deref() - .map(|s| s.trim()) - .is_some_and(|s| s == "none") - { - log::info!( - "[memory_tree::embed::factory] embeddings_provider=none — \ - using InertEmbedder (vector search disabled)" - ); - return Ok(Box::new(InertEmbedder::new())); - } - - // Honour the unified AI settings: `embeddings_provider` is the - // single source of truth. When it parses as `ollama:` we - // route locally; otherwise we fall back to the cloud session. - if let Some(model) = config.workload_local_model("embeddings") { - let endpoint = ollama_base_url(); - let timeout_ms = tree_cfg.embedding_timeout_ms.unwrap_or(0); - log::debug!( - "[memory_tree::embed::factory] embeddings_provider=ollama:{} — using local Ollama endpoint={} timeout_ms={}", - model, endpoint, timeout_ms - ); - Ok(Box::new(OllamaEmbedder::new(endpoint, model, timeout_ms))) - } else if cloud_session_available(config) { - // Default for logged-in users: cloud (OpenHuman backend / - // Voyage `voyage-3.5`, 1024 dims). Matches the main - // embeddings path so a fresh install needs zero local - // Ollama setup. JWT failures (expired, invalid, etc.) - // surface as embed-call errors so ingest's existing - // retry-with-backoff logic handles them. - log::debug!( - "[memory_tree::embed::factory] using cloud (Voyage) — \ - flip 'Memory embeddings' in Local AI Settings to switch to local" - ); - Ok(Box::new(CloudEmbedder::new(config))) - } else { - // Pre-login, test harness, or unauthenticated runtime - // path — no auth-profiles.json on disk means the cloud - // path has no chance of resolving a bearer. Drop to - // InertEmbedder (zero vectors) so ingest/seal/retrieval - // can run without panic; semantic rerank degrades to - // recency only until the user logs in (or until they - // flip "Memory embeddings" to local with Ollama running). - log::warn!( - "[memory_tree::embed::factory] no backend session found — \ - using InertEmbedder (zero vectors). Log in to OpenHuman, or \ - enable 'Memory embeddings' in Local AI Settings, to fix." - ); - Ok(Box::new(InertEmbedder::new())) - } + EmbedderChoice::OpenAiCompat(openai) => { + clear_semantic_recall_degraded(); + Some(Box::new(openai)) } - } + EmbedderChoice::Cloud => { + clear_semantic_recall_degraded(); + Some(Box::new(CloudEmbedder::new(config))) + } + EmbedderChoice::NoProvider => { + log::warn!( + "[memory_tree::embed::factory] no usable embeddings provider — skipping \ + embedding (chunk persists embedding-less, re-embeddable later). Set up \ + local Ollama embeddings or log in to OpenHuman to enable semantic recall." + ); + mark_semantic_recall_degraded(FailureCode::EmbeddingsUnconfigured); + None + } + }) } #[cfg(test)] @@ -175,6 +283,102 @@ mod tests { assert_eq!(e.name(), "ollama"); } + // ── build_write_embedder (T010, #002 FR-002) ───────────────────────── + // + // These assert the write-path factory's "skip vs embed" contract. The + // degraded flag is a process-global atomic, so the flag-sensitive tests + // serialize on a shared mutex to avoid stomping each other under cargo's + // parallel test runner. + // Delegate to the health module's shared guard so factory tests serialise + // against the rpc/extract tests that touch the SAME process-global flags + // (a factory-local mutex would only serialise within this module, leaving + // a cross-module race). The guard also resets the flags on entry. + fn degraded_flag_lock() -> std::sync::MutexGuard<'static, ()> { + crate::openhuman::memory_tree::health::test_guard() + } + + #[test] + fn write_embedder_none_when_no_provider_and_marks_degraded() { + use crate::openhuman::memory_tree::health::{ + clear_semantic_recall_degraded, current_degraded_state, FailureCode, + }; + let _guard = degraded_flag_lock(); + clear_semantic_recall_degraded(); + let (_tmp, mut cfg) = test_config(); + cfg.memory_tree.embedding_endpoint = None; + cfg.memory_tree.embedding_model = None; + // No auth-profiles.json, no local workload model → no usable provider. + let e = build_write_embedder(&cfg).expect("factory must not error"); + assert!( + e.is_none(), + "no provider → skip embedding (None), not inert" + ); + let d = current_degraded_state(); + assert!( + d.semantic_recall, + "semantic recall must be flagged degraded" + ); + assert_eq!( + d.cause.map(|c| c.code), + Some(FailureCode::EmbeddingsUnconfigured) + ); + clear_semantic_recall_degraded(); + } + + #[test] + fn write_embedder_some_cloud_with_session_and_clears_degraded() { + use crate::openhuman::memory_tree::health::{ + current_degraded_state, mark_semantic_recall_degraded, FailureCode, + }; + let _guard = degraded_flag_lock(); + // Pretend a prior run left recall degraded; a working provider clears it. + mark_semantic_recall_degraded(FailureCode::EmbeddingsUnconfigured); + let (_tmp, mut cfg) = test_config(); + cfg.memory_tree.embedding_endpoint = None; + cfg.memory_tree.embedding_model = None; + touch_auth_profile(&cfg); + let e = build_write_embedder(&cfg) + .expect("factory must not error") + .expect("cloud session → Some(embedder)"); + assert_eq!(e.name(), "cloud"); + assert!( + !current_degraded_state().semantic_recall, + "a usable provider must clear the degraded flag" + ); + } + + #[test] + fn write_embedder_some_ollama_override() { + let (_tmp, mut cfg) = test_config(); + cfg.memory_tree.embedding_endpoint = Some("http://localhost:11434".into()); + cfg.memory_tree.embedding_model = Some("bge-m3".into()); + let e = build_write_embedder(&cfg) + .expect("factory must not error") + .expect("override → Some(embedder)"); + assert_eq!(e.name(), "ollama"); + } + + #[test] + fn write_embedder_none_provider_is_inert_not_skip() { + use crate::openhuman::memory_tree::health::{ + clear_semantic_recall_degraded, current_degraded_state, + }; + let _guard = degraded_flag_lock(); + clear_semantic_recall_degraded(); + let (_tmp, mut cfg) = test_config(); + cfg.embeddings_provider = Some("none".into()); + // Deliberate opt-out → InertEmbedder (vector search off by choice), + // and NOT flagged as a degradation. + let e = build_write_embedder(&cfg) + .expect("factory must not error") + .expect("provider=none → Some(inert), not skip"); + assert_eq!(e.name(), "inert"); + assert!( + !current_degraded_state().semantic_recall, + "explicit opt-out is not a degradation" + ); + } + #[test] fn unset_endpoint_with_session_routes_to_cloud() { let (_tmp, mut cfg) = test_config(); @@ -265,6 +469,57 @@ mod tests { assert_eq!(e.name(), "inert"); } + #[test] + fn write_embedder_routes_to_openai_when_memory_provider_is_openai() { + // #002 FR-015 regression: the headline bug was that a user-configured + // OpenAI embeddings provider (`config.memory.embedding_provider = + // "openai"`) matched no factory branch and silently fell through to the + // managed-budget backend. Lock the routing in at the FACTORY level — + // `openai_compat`'s own tests only cover `try_from_config` in isolation, + // so a factory refactor could re-break this with those tests still green. + // + // Note the two distinct config fields the factory reads: the top-level + // `embeddings_provider` (here unset, so the "none"/`ollama:` branches do + // not match) vs `memory.embedding_provider` (the unified Embeddings- + // settings field that drives the OpenAI/custom detection). + let _guard = degraded_flag_lock(); + use crate::openhuman::memory_tree::health::{ + current_degraded_state, mark_semantic_recall_degraded, FailureCode, + }; + mark_semantic_recall_degraded(FailureCode::EmbeddingsUnconfigured); + let (_tmp, mut cfg) = test_config(); + cfg.memory_tree.embedding_endpoint = None; + cfg.memory_tree.embedding_model = None; + cfg.embeddings_provider = None; // top-level workload routing: unset + cfg.memory.embedding_provider = "openai".to_string(); + cfg.memory.embedding_model = "text-embedding-3-large".to_string(); + let e = build_write_embedder(&cfg) + .expect("factory must not error") + .expect("openai provider → Some(embedder), must NOT fall through to skip/cloud"); + assert_eq!( + e.name(), + "openai", + "must route to the user's OpenAI embeddings, not the managed backend" + ); + assert!( + !current_degraded_state().semantic_recall, + "a usable OpenAI provider must clear the degraded flag" + ); + } + + #[test] + fn read_embedder_routes_to_openai_when_memory_provider_is_openai() { + // Same FR-015 routing, read path (`build_embedder_from_config`). + let (_tmp, mut cfg) = test_config(); + cfg.memory_tree.embedding_endpoint = None; + cfg.memory_tree.embedding_model = None; + cfg.embeddings_provider = None; + cfg.memory.embedding_provider = "openai".to_string(); + cfg.memory.embedding_model = "text-embedding-3-large".to_string(); + let e = build_embedder_from_config(&cfg).expect("openai path should build"); + assert_eq!(e.name(), "openai"); + } + #[test] fn explicit_endpoint_override_wins_over_local_ai_flag() { // Power-user override beats the checkbox. diff --git a/src/openhuman/memory_tree/score/embed/mod.rs b/src/openhuman/memory_tree/score/embed/mod.rs index 3f9048be00..7f678857b1 100644 --- a/src/openhuman/memory_tree/score/embed/mod.rs +++ b/src/openhuman/memory_tree/score/embed/mod.rs @@ -33,11 +33,13 @@ pub mod cloud; pub mod factory; pub mod inert; pub mod ollama; +pub mod openai_compat; pub use cloud::CloudEmbedder; -pub use factory::build_embedder_from_config; +pub use factory::{build_embedder_from_config, build_write_embedder}; pub use inert::InertEmbedder; pub use ollama::OllamaEmbedder; +pub use openai_compat::OpenAiCompatEmbedder; /// Embedding dimensionality used across the memory tree. /// diff --git a/src/openhuman/memory_tree/score/embed/openai_compat.rs b/src/openhuman/memory_tree/score/embed/openai_compat.rs new file mode 100644 index 0000000000..9e18cd7355 --- /dev/null +++ b/src/openhuman/memory_tree/score/embed/openai_compat.rs @@ -0,0 +1,152 @@ +//! Memory-tree [`Embedder`] backed by a user-configured OpenAI-compatible +//! embeddings provider (#002 FR-015). +//! +//! ## Why this exists +//! +//! The memory-tree embedder factory historically resolved only: explicit +//! Ollama override → `ollama:` workload prefix → managed `CloudEmbedder` +//! (backend→Voyage) → skip. So a user who configured **OpenAI** (or any +//! custom OpenAI-compatible endpoint) in Settings → AI → Embeddings was +//! silently ignored: their `embeddings_provider = "openai"` matched no branch +//! and fell through to the managed backend, which then hit "managed budget" +//! while the user's own key sat unused. This adapter closes that gap. +//! +//! ## How +//! +//! It wraps the unified [`EmbeddingProvider`] built by +//! [`create_embedding_provider_with_credentials`] (the same construction the +//! Settings "Test connection" + main embed RPC use, so there is one source of +//! truth for OpenAI/custom embeddings) and adapts it to the memory-tree +//! [`Embedder`] trait. Dimensions are pinned to [`EMBEDDING_DIM`] (1024) — the +//! tree's on-disk format is fixed there — and the OpenAI request path now +//! sends the `dimensions` parameter (see `embeddings::openai`) so a reducible +//! model (`text-embedding-3-large`) returns 1024 instead of its native 3072. +//! A returned vector of the wrong size surfaces as the trait's standard +//! "expected N dims" error, which the worker classifies as +//! `embedding_dim_mismatch`. + +use anyhow::{Context, Result}; +use async_trait::async_trait; + +use super::{Embedder, EMBEDDING_DIM}; +use crate::openhuman::config::Config; +use crate::openhuman::embeddings::EmbeddingProvider; + +/// Adapter from the unified [`EmbeddingProvider`] to the memory-tree +/// [`Embedder`] trait for the OpenAI / custom-OpenAI providers. +pub struct OpenAiCompatEmbedder { + inner: Box, + /// Short label for logs (e.g. "openai", "custom"). + label: &'static str, +} + +impl OpenAiCompatEmbedder { + /// Try to build the adapter from the user's configured embeddings settings. + /// + /// Returns `Ok(None)` when `config.memory.embedding_provider` is **not** an + /// OpenAI-compatible provider (so the caller's resolution chain continues + /// to the next branch), and `Ok(Some(_))` when it is. Errors only on an + /// actual construction failure (which the caller can treat as + /// fail-fast-worthy). + /// + /// Always requests [`EMBEDDING_DIM`] regardless of the user's configured + /// dimensions — the tree format is fixed at 1024, and the OpenAI path now + /// honours the `dimensions` param so 3-large complies. + pub fn try_from_config(config: &Config) -> Result> { + let provider = config.memory.embedding_provider.trim(); + let (slug, label): (&str, &'static str) = if provider == "openai" { + ("openai", "openai") + } else if provider == "custom" || provider.starts_with("custom:") { + ("custom", "custom") + } else { + // Not an OpenAI-compatible provider — let the caller fall through. + return Ok(None); + }; + + let model = config.memory.embedding_model.trim(); + let api_key = crate::openhuman::embeddings::resolve_api_key(config, provider); + let custom_endpoint = provider.strip_prefix("custom:"); + + let inner = crate::openhuman::embeddings::create_embedding_provider_with_credentials( + slug, + model, + EMBEDDING_DIM, + &api_key, + custom_endpoint, + ) + .with_context(|| format!("build {label} embedder for memory tree"))?; + + log::debug!( + "[memory_tree::embed::openai_compat] using {label} provider model={} dims={}", + model, + EMBEDDING_DIM + ); + Ok(Some(Self { inner, label })) + } +} + +#[async_trait] +impl Embedder for OpenAiCompatEmbedder { + fn name(&self) -> &'static str { + self.label + } + + async fn embed(&self, text: &str) -> Result> { + let v = self + .inner + .embed_one(text) + .await + .with_context(|| format!("{} embeddings failed", self.label))?; + if v.len() != EMBEDDING_DIM { + anyhow::bail!( + "{} embedder returned {} dims, expected {}", + self.label, + v.len(), + EMBEDDING_DIM + ); + } + Ok(v) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn cfg_with_provider(p: &str) -> (TempDir, Config) { + let tmp = TempDir::new().unwrap(); + let mut cfg = Config::default(); + cfg.workspace_dir = tmp.path().to_path_buf(); + cfg.config_path = tmp.path().join("config.toml"); + cfg.memory.embedding_provider = p.to_string(); + cfg.memory.embedding_model = "text-embedding-3-large".to_string(); + (tmp, cfg) + } + + #[test] + fn none_for_non_openai_providers() { + // managed / voyage / ollama / none must fall through (Ok(None)). + for p in ["managed", "cloud", "voyage", "ollama:bge-m3", "none"] { + let (_tmp, cfg) = cfg_with_provider(p); + let got = OpenAiCompatEmbedder::try_from_config(&cfg).expect("no error"); + assert!(got.is_none(), "{p} should fall through, got Some"); + } + } + + #[test] + fn some_for_openai() { + let (_tmp, cfg) = cfg_with_provider("openai"); + let got = OpenAiCompatEmbedder::try_from_config(&cfg).expect("no error"); + let e = got.expect("openai should build an adapter"); + assert_eq!(e.name(), "openai"); + } + + #[test] + fn some_for_custom() { + let (_tmp, cfg) = cfg_with_provider("custom:https://embed.example/v1"); + let got = OpenAiCompatEmbedder::try_from_config(&cfg).expect("no error"); + let e = got.expect("custom should build an adapter"); + assert_eq!(e.name(), "custom"); + } +} diff --git a/src/openhuman/memory_tree/score/extract/llm.rs b/src/openhuman/memory_tree/score/extract/llm.rs index 78d9484852..69adc55672 100644 --- a/src/openhuman/memory_tree/score/extract/llm.rs +++ b/src/openhuman/memory_tree/score/extract/llm.rs @@ -149,7 +149,18 @@ impl EntityExtractor for LlmEntityExtractor { for attempt in 0..MAX_ATTEMPTS { match self.try_extract(text).await { - Some(extracted) => return Ok(extracted), + Some(extracted) => { + // #002 (T013): a completed extraction that yielded + // structure means the extraction model is working — + // clear any prior "structure degraded" flag so the + // status/doctor surface recovers. (An empty-but-valid + // result, e.g. genuinely entity-free text, is left + // alone — it isn't evidence the model is broken.) + if !extracted.entities.is_empty() || !extracted.topics.is_empty() { + crate::openhuman::memory_tree::health::clear_structure_degraded(); + } + return Ok(extracted); + } None => { // Transport failure. Retry with exponential backoff // unless we've exhausted attempts. @@ -167,11 +178,21 @@ impl EntityExtractor for LlmEntityExtractor { } } + // #002 (T013): every attempt hit a transport failure (the model + // timed out / was unreachable). The soft-fallback contract still + // returns empty (ingest never blocks on a slow model), but we now + // record a structure-degraded signal with a typed cause so the + // status/doctor surface can say "extraction is timing out — switch + // the Memory extraction model" instead of presenting an empty wiki + // as success. log::warn!( "[memory_tree::extract::llm] transport failed after {} attempts — \ - returning empty extraction", + returning empty extraction (structure degraded)", MAX_ATTEMPTS ); + crate::openhuman::memory_tree::health::mark_structure_degraded( + crate::openhuman::memory_tree::health::FailureCode::ExtractionTimeout, + ); Ok(ExtractedEntities::default()) } } diff --git a/src/openhuman/memory_tree/score/extract/llm_tests.rs b/src/openhuman/memory_tree/score/extract/llm_tests.rs index 4e6d6a2e20..b6e29ee33b 100644 --- a/src/openhuman/memory_tree/score/extract/llm_tests.rs +++ b/src/openhuman/memory_tree/score/extract/llm_tests.rs @@ -193,6 +193,10 @@ async fn extract_soft_fallback_on_provider_failure() { // Provider always errors. extract() must NOT return Err — it must // return an empty ExtractedEntities with a warn log after retry // exhaustion. + // #002: this path now sets the process-global "structure degraded" flag. + // Hold the shared health test-guard so the flag is reset on entry and the + // signal doesn't leak into parallel status tests. + let _health_guard = crate::openhuman::memory_tree::health::test_guard(); use crate::openhuman::memory::chat::{ChatPrompt, ChatProvider}; use async_trait::async_trait; use std::sync::Arc; diff --git a/src/openhuman/memory_tree/tree/bucket_seal.rs b/src/openhuman/memory_tree/tree/bucket_seal.rs index 4adb3170cf..b7d75a770f 100644 --- a/src/openhuman/memory_tree/tree/bucket_seal.rs +++ b/src/openhuman/memory_tree/tree/bucket_seal.rs @@ -46,7 +46,7 @@ use crate::openhuman::memory_store::content::{atomic::stage_summary, SummaryComp use crate::openhuman::memory_store::trees::types::{ Buffer, SummaryNode, Tree, TreeKind, INPUT_TOKEN_BUDGET, OUTPUT_TOKEN_BUDGET, SUMMARY_FANOUT, }; -use crate::openhuman::memory_tree::score::embed::build_embedder_from_config; +use crate::openhuman::memory_tree::score::embed::build_write_embedder; use crate::openhuman::memory_tree::score::extract::EntityExtractor; use crate::openhuman::memory_tree::score::resolver::canonicalise; use crate::openhuman::memory_tree::summarise::{ @@ -446,39 +446,66 @@ pub(crate) async fn seal_one_level( }, ); - let embedding: Option> = match build_embedder_from_config(config) { - Ok(embedder) => { - let embed_input = truncate_for_embed(&output.content, 1_000); - log::info!( - "[tree::bucket_seal] embed input: original_chars={} truncated_chars={}", - output.content.len(), - embed_input.len() + // Conservative cap. Slack-style chat content (URLs, mentions, + // emoji) tokenizes 2-4× higher than the 4-chars/token heuristic. + // 1000 approx-tokens (~4000 chars) is comfortably under 8192 + // even at 4× tokenizer ratio. + let embed_input = truncate_for_embed(&output.content, 1_000); + log::info!( + "[tree::bucket_seal] embed input: original_chars={} truncated_chars={}", + output.content.len(), + embed_input.len() + ); + // #002 (FR-002): skip embedding when no usable provider is configured + // (build_write_embedder returns None) rather than writing a fake all-zero + // vector. The summary is sealed embedding-less (re-embeddable later) and + // the semantic-recall degraded flag is already set with a typed cause. + let embedding: Option> = match build_write_embedder(config) + .context("build embedder during seal")? + { + None => { + log::warn!( + "[tree::bucket_seal] embeddings unavailable for tree_id={} level={}→{} \ + — sealing summary without embedding (semantic recall degraded)", + tree.id, + level, + target_level ); - match embedder.embed(&embed_input).await { - Ok(vector) => { - log::debug!( - "[tree::bucket_seal] embedded summary tree_id={} level={}→{} provider={}", - tree.id, - level, - target_level, - embedder.name() - ); - Some(vector) - } + None + } + Some(embedder) => { + let v = match embedder.embed(&embed_input).await { + Ok(v) => v, Err(e) => { - log::warn!( - "[tree::bucket_seal] embed failed during seal tree_id={} level={}: {e:#} — sealing without embedding", + // #002: classify so the seal job fails fast on + // unrecoverable embed causes (budget/auth/dim) with a + // typed reason instead of retrying; original chain + // preserved as context. + let failure = crate::openhuman::memory_tree::health::classify_embed_error(&e); + return Err(anyhow::Error::new(failure).context(format!( + "embed summary during seal tree_id={} level={}: {e:#}", tree.id, level - ); - None + ))); } - } - } - Err(e) => { - log::warn!( - "[tree::bucket_seal] build embedder failed during seal: {e:#} — sealing without embedding" + }; + // Dimension guard: reject wrong-dimensionality vectors before + // they reach the store — same contract as handle_extract's + // pack_checked. Without this a provider returning the wrong + // shape slips into the summary sidecar silently. + crate::openhuman::memory_tree::score::embed::pack_checked(&v).context(format!( + "seal embed dim check tree_id={} level={}", + tree.id, level + ))?; + log::debug!( + "[tree::bucket_seal] embedded summary tree_id={} level={}→{} bytes={} provider={}", + tree.id, + level, + target_level, + output.content.len(), + embedder.name() ); - None + crate::openhuman::memory_tree::health::clear_semantic_recall_degraded(); + Some(v) } }; diff --git a/src/openhuman/memory_tree/tree/bucket_seal_tests.rs b/src/openhuman/memory_tree/tree/bucket_seal_tests.rs index f27cc83ad6..e03452ba0c 100644 --- a/src/openhuman/memory_tree/tree/bucket_seal_tests.rs +++ b/src/openhuman/memory_tree/tree/bucket_seal_tests.rs @@ -41,6 +41,12 @@ fn test_config() -> (TempDir, Config) { cfg.memory_tree.embedding_endpoint = None; cfg.memory_tree.embedding_model = None; cfg.memory_tree.embedding_strict = false; + // #002: opt into the deterministic inert embedder via `provider="none"`. + // This is `Some(inert)` (vector search off by choice) and does NOT set the + // process-global semantic-recall degraded flag — unlike the no-provider + // path, which marks degraded and would leak that signal into parallel + // `pipeline_status` tests. + cfg.embeddings_provider = Some("none".into()); (tmp, cfg) } diff --git a/src/openhuman/memory_tree/tree/rpc.rs b/src/openhuman/memory_tree/tree/rpc.rs index ff2c5e5668..868ff24fb4 100644 --- a/src/openhuman/memory_tree/tree/rpc.rs +++ b/src/openhuman/memory_tree/tree/rpc.rs @@ -7,6 +7,7 @@ //! - `openhuman.memory_tree_list_chunks` — listing with filters. //! - `openhuman.memory_tree_get_chunk` — single chunk fetch. +use rusqlite::OptionalExtension; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -273,13 +274,14 @@ pub struct PipelineJobCounts { /// active sync, failed > 0 implies degraded. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct PipelineStatusResponse { - /// Aggregated status string: `running` | `paused` | `syncing` | `error` - /// | `idle`. Derivation: + /// Aggregated status string: `running` | `paused` | `syncing` | + /// `degraded` | `error` | `idle`. Derivation: /// 1. `is_paused` (scheduler-gate `off`) wins → `paused`. /// 2. otherwise failed > 0 → `error`. - /// 3. otherwise running > 0 → `syncing`. - /// 4. otherwise total_chunks > 0 → `running`. - /// 5. otherwise → `idle`. + /// 3. otherwise degraded (#002, recall/structure reduced) → `degraded`. + /// 4. otherwise running > 0 → `syncing`. + /// 5. otherwise total_chunks > 0 → `running`. + /// 6. otherwise → `idle`. pub status: String, /// Optional human-readable reason — populated when status is /// `paused` or `error`. `None` otherwise. @@ -301,6 +303,30 @@ pub struct PipelineStatusResponse { /// Convenience flag: scheduler-gate is in `off` mode, so all LLM-bound /// background work is paused cooperatively. pub is_paused: bool, + /// #002 (FR-002/FR-004): "the pipeline ran but output quality is reduced" + /// — `semantic_recall` true when embeddings were skipped (no usable + /// provider, so recall falls back to recency), `structure` true when + /// extraction yielded nothing across the board (empty wiki). Carries the + /// typed `cause` so the UI can render an actionable remediation. Additive: + /// `#[serde(default)]` keeps older clients deserialising the response. + #[serde(default)] + pub degraded: crate::openhuman::memory_tree::health::DegradedState, + /// #002 (FR-004): the single first blocking/most-significant cause, as a + /// typed failure with an i18n remediation key. Populated from a failed + /// job's classified reason or the active degradation cause; `None` when + /// the pipeline is healthy. The frontend renders this verbatim (resolving + /// `remediation_key`) instead of re-deriving a cause from raw counters. + #[serde(default)] + pub first_blocking_cause: Option, + /// #002 (FR-010 / US5): fraction of chunks with ≥1 indexed entity, in + /// `[0.0, 1.0]`. Near 0 with `total_chunks > 0` means extraction is + /// producing no structure (the "empty-but-built wiki"). `None` when the + /// metric could not be measured (DB read error) — deliberately distinct + /// from a genuine `Some(0.0)` so the status surface never misreports a + /// broken measurement path as a structure failure. Additive + /// (`#[serde(default)]` → `None` for older clients). + #[serde(default)] + pub extraction_coverage: Option, } /// `memory_tree_pipeline_status` RPC handler (#1856 Part 1). @@ -382,14 +408,62 @@ pub async fn pipeline_status_rpc( let is_paused = config.scheduler_gate.mode == SchedulerGateMode::Off; let is_syncing = pipeline_jobs.running > 0; + // #002: read the process-global degradation snapshot (set by the embed / + // extract stages) so a half-working sync surfaces as `degraded` with a + // cause rather than a misleading `running`. + let degraded = crate::openhuman::memory_tree::health::current_degraded_state(); + let (status, reason) = derive_pipeline_status( is_paused, config.scheduler_gate.mode, is_syncing, pipeline_jobs.failed, total_chunks, + °raded, ); + // #002: both of these touch SQLite, so run them off the async runtime + // thread in a single blocking task (a contended DB could otherwise pin a + // Tokio worker for the busy-timeout window). Best-effort — failures degrade + // to `None` rather than failing the polled status RPC. + // - first_blocking_cause (FR-004): the most-recent failed job's typed + // reason, surfaced verbatim by the UI. + // - extraction_coverage (FR-010/US5): fraction of chunks with structure. + // `None` (not `0.0`) on a read error, so a broken measurement path is + // never mistaken for a genuine 0% extraction rate. + let (latest_failure, extraction_coverage) = { + let cfg = config.clone(); + tokio::task::spawn_blocking(move || { + // Log-then-drop: keep the None fallback (these reads must not fail + // the polled status RPC) but emit a grep-friendly diagnostic so a + // DB/query failure is distinguishable from "no blocking cause" / + // "metric unavailable by design". + let failure = latest_failed_job_failure(&cfg).unwrap_or_else(|e| { + log::warn!( + "[memory-tree][rpc] pipeline_status: latest_failed_job_failure read failed: {e:#}" + ); + None + }); + let coverage = crate::openhuman::memory_store::chunks::store::extraction_coverage(&cfg) + .map_err(|e| { + log::warn!( + "[memory-tree][rpc] pipeline_status: extraction_coverage read failed: {e:#}" + ); + }) + .ok(); + (failure, coverage) + }) + .await + .unwrap_or_else(|e| { + log::warn!("[memory-tree][rpc] pipeline_status: ancillary metrics join error: {e:#}"); + (None, None) + }) + }; + + // A hard failed-job reason is more urgent than a soft degradation; fall + // back to the active degradation cause, then `None` when healthy. + let first_blocking_cause = latest_failure.or_else(|| degraded.cause.clone()); + let payload = PipelineStatusResponse { status: status.clone(), reason: reason.clone(), @@ -399,6 +473,9 @@ pub async fn pipeline_status_rpc( pipeline_jobs, is_syncing, is_paused, + degraded, + first_blocking_cause, + extraction_coverage, }; log::debug!( @@ -416,6 +493,104 @@ pub async fn pipeline_status_rpc( )) } +/// `memory_tree_doctor` RPC handler (#002 FR-009). Runs the one-shot +/// pipeline diagnostic and returns the [`DoctorReport`] — per-stage health, +/// the first blocking cause, the degraded snapshot, and counters. Exposed for +/// the agent tool + CLI so the agent can self-diagnose an empty/stalled wiki. +/// Synchronous + cheap (config + queue counters + degraded flags), so no +/// blocking-pool dispatch is needed. +pub async fn doctor_rpc( + config: &Config, +) -> Result, String> { + // Offload the doctor's blocking SQLite reads off the async runtime thread. + let report = crate::openhuman::memory_tree::health::async_run_doctor(config).await; + let summary = if report.healthy { + "memory_tree: doctor — healthy".to_string() + } else { + format!( + "memory_tree: doctor — first_blocking_cause={}", + report + .first_blocking_cause + .as_ref() + .map(|f| f.code.as_str()) + .unwrap_or("unknown") + ) + }; + Ok(RpcOutcome::single_log(report, summary)) +} + +/// Response from `memory_tree_retry_failed` (#002 FR-011). +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct RetryFailedResponse { + /// Number of `failed` jobs flipped back to `ready` for retry. + pub requeued: u64, +} + +/// `memory_tree_retry_failed` RPC handler (#002 FR-011). Flips every +/// terminally-`failed` `mem_tree_jobs` row back to `ready` (fresh attempt +/// budget, typed reason cleared) so jobs that failed under a now-fixed config +/// re-run without re-ingesting source data. Backs the "Retry failed" button. +pub async fn retry_failed_rpc(config: &Config) -> Result, String> { + let cfg = config.clone(); + let requeued = tokio::task::spawn_blocking(move || { + crate::openhuman::memory_queue::store::requeue_failed(&cfg) + }) + .await + .map_err(|e| format!("retry_failed join error: {e}"))? + .map_err(|e| format!("retry_failed: {e:#}"))?; + // Wake the worker pool so the requeued jobs are picked up promptly. + crate::openhuman::memory_queue::wake_workers(); + Ok(RpcOutcome::single_log( + RetryFailedResponse { requeued }, + format!("memory_tree: retry_failed requeued={requeued}"), + )) +} + +/// #002 (FR-004): the typed [`PipelineFailure`] of the most-recently-failed +/// `mem_tree_jobs` row, when it carries a classified `failure_reason`. Returns +/// `Ok(None)` when there is no failed job with a typed reason (older failures +/// predating the typed-failure columns, or none at all). Best-effort: the +/// status panel is a UI convenience, so a DB error degrades to `Ok(None)` +/// rather than failing the whole status RPC. +fn latest_failed_job_failure( + config: &Config, +) -> Result, String> { + use crate::openhuman::memory_tree::health::{FailureClass, FailureCode, PipelineFailure}; + + let row: Option<(Option, Option)> = + chunk_store::with_connection(config, |conn| { + conn.query_row( + "SELECT failure_reason, failure_class FROM mem_tree_jobs + WHERE status = 'failed' AND failure_reason IS NOT NULL + ORDER BY completed_at_ms DESC LIMIT 1", + [], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .optional() + .map_err(Into::into) + }) + .map_err(|e| format!("latest_failed_job_failure: {e:#}"))?; + + let Some((Some(reason), class)) = row else { + return Ok(None); + }; + let Some(code) = FailureCode::from_str(&reason) else { + return Ok(None); + }; + // Trust the persisted class when present and parseable; otherwise derive + // from the code (keeps a forward-compatible default if the column is NULL + // on an older row). + let mut failure = PipelineFailure::new(code); + if let Some(c) = class.as_deref() { + if c == "transient" { + failure.class = FailureClass::Transient; + } else if c == "unrecoverable" { + failure.class = FailureClass::Unrecoverable; + } + } + Ok(Some(failure)) +} + /// Recursive byte-count of files under `root`. Returns `0` when the root /// does not exist or any traversal error occurs (best-effort; the status /// panel is a UI convenience, not an audit surface). @@ -459,6 +634,7 @@ fn derive_pipeline_status( is_syncing: bool, failed: u64, total_chunks: u64, + degraded: &crate::openhuman::memory_tree::health::DegradedState, ) -> (String, Option) { if is_paused { return ( @@ -472,6 +648,25 @@ fn derive_pipeline_status( Some(format!("{failed} failed job(s) in pipeline")), ); } + // #002 (FR-005): "degraded" sits below error but above syncing/running — + // the pipeline is making progress, but recall/structure is reduced and the + // user should be told why. Beats syncing/running so a half-working sync + // isn't reported as plain "running"/"syncing". + // + // Only fires when there are chunks: degraded recall/structure is only + // meaningful when there's actual content affected. An empty workspace with + // a misconfigured embedder should show "idle" (nothing to recall) rather + // than "degraded" (recall is broken for existing content). + if degraded.is_degraded() && total_chunks > 0 { + let mut parts = Vec::new(); + if degraded.semantic_recall { + parts.push("semantic recall disabled"); + } + if degraded.structure { + parts.push("wiki structure incomplete"); + } + return ("degraded".to_string(), Some(parts.join("; "))); + } if is_syncing { return ("syncing".to_string(), None); } @@ -790,28 +985,74 @@ mod tests { #[test] fn derive_pipeline_status_precedence_matches_spec() { use crate::openhuman::config::SchedulerGateMode; + use crate::openhuman::memory_tree::health::{DegradedState, FailureCode, PipelineFailure}; - // paused beats everything else - let (s, reason) = derive_pipeline_status(true, SchedulerGateMode::Off, true, 5, 100); + let healthy = DegradedState::default(); + let recall_degraded = DegradedState { + semantic_recall: true, + structure: false, + cause: Some(PipelineFailure::new(FailureCode::EmbeddingsUnconfigured)), + }; + let structure_degraded = DegradedState { + semantic_recall: false, + structure: true, + cause: Some(PipelineFailure::new(FailureCode::ExtractionTimeout)), + }; + + // paused beats everything else (even degradation) + let (s, reason) = + derive_pipeline_status(true, SchedulerGateMode::Off, true, 5, 100, &recall_degraded); assert_eq!(s, "paused"); assert!(reason.unwrap().contains("off")); - // error beats syncing / running / idle - let (s, reason) = derive_pipeline_status(false, SchedulerGateMode::Auto, true, 2, 100); + // error beats degraded / syncing / running / idle + let (s, reason) = derive_pipeline_status( + false, + SchedulerGateMode::Auto, + true, + 2, + 100, + &recall_degraded, + ); assert_eq!(s, "error"); assert!(reason.unwrap().contains("2 failed")); - // syncing beats running / idle - let (s, reason) = derive_pipeline_status(false, SchedulerGateMode::Auto, true, 0, 100); + // #002: degraded beats syncing / running / idle (but loses to paused/error) + let (s, reason) = derive_pipeline_status( + false, + SchedulerGateMode::Auto, + true, // syncing + 0, + 100, + &recall_degraded, + ); + assert_eq!(s, "degraded", "degraded must beat syncing"); + assert!(reason.unwrap().contains("semantic recall disabled")); + + let (s, reason) = derive_pipeline_status( + false, + SchedulerGateMode::Auto, + false, + 0, + 100, + &structure_degraded, + ); + assert_eq!(s, "degraded"); + assert!(reason.unwrap().contains("wiki structure incomplete")); + + // syncing beats running / idle (when healthy) + let (s, reason) = + derive_pipeline_status(false, SchedulerGateMode::Auto, true, 0, 100, &healthy); assert_eq!(s, "syncing"); assert!(reason.is_none()); // running when chunks exist but nothing in flight - let (s, _) = derive_pipeline_status(false, SchedulerGateMode::Auto, false, 0, 100); + let (s, _) = + derive_pipeline_status(false, SchedulerGateMode::Auto, false, 0, 100, &healthy); assert_eq!(s, "running"); // idle when the store is empty and nothing is in flight - let (s, _) = derive_pipeline_status(false, SchedulerGateMode::Auto, false, 0, 0); + let (s, _) = derive_pipeline_status(false, SchedulerGateMode::Auto, false, 0, 0, &healthy); assert_eq!(s, "idle"); } @@ -820,6 +1061,10 @@ mod tests { /// "no memory yet" state. #[tokio::test] async fn pipeline_status_returns_idle_for_empty_store() { + // #002: the degraded flags are process-global; reset+serialise so a + // parallel test (factory None-path, extract transport-fail) can't leak + // a "degraded" signal into this fresh-workspace assertion. + let _g = crate::openhuman::memory_tree::health::test_guard(); let (_tmp, cfg) = test_config(); let out = pipeline_status_rpc(&cfg).await.unwrap().value; assert_eq!(out.status, "idle"); @@ -855,6 +1100,9 @@ mod tests { /// timestamp from `mem_tree_chunks`. #[tokio::test] async fn pipeline_status_reports_chunk_aggregates_after_ingest() { + // #002: reset+serialise the process-global degraded flags so this + // "running" assertion isn't flipped to "degraded" by a parallel test. + let _g = crate::openhuman::memory_tree::health::test_guard(); let (_tmp, cfg) = test_config(); // Seed one document so `mem_tree_chunks` is non-empty. @@ -882,7 +1130,10 @@ mod tests { "ingest must populate last_sync_ms (got {})", out.last_sync_ms ); - // No jobs running ⇒ running status, not syncing/error. + // No jobs running ⇒ running status, not syncing/error. (We hold + // `test_guard()` which resets the process-global degraded flags on + // entry and serialises against every other flag-touching test, so the + // status reflects this workspace's state, not a sibling's leak.) assert_eq!(out.status, "running"); assert!(!out.is_syncing); } diff --git a/src/openhuman/memory_tree/tree_runtime/cli.rs b/src/openhuman/memory_tree/tree_runtime/cli.rs index d58bb74577..b24f6f15c0 100644 --- a/src/openhuman/memory_tree/tree_runtime/cli.rs +++ b/src/openhuman/memory_tree/tree_runtime/cli.rs @@ -595,16 +595,20 @@ mod tests { } #[test] - fn run_summarize_surfaces_local_ai_requirement_before_empty_buffer_skip() { + fn run_summarize_errors_cleanly_without_provider() { + // With no local AI and no cloud opt-in (default), `run` returns a clean + // actionable error rather than panicking or giving an opaque failure. + // Users must enable local AI (Ollama) or set cloud_summarization_opt_in + // in Settings → AI → Memory (or via OPENHUMAN_MEMORY_TREE_CLOUD_SUMMARIZATION=true). let tmp = TempDir::new().unwrap(); let _workspace = WorkspaceEnvGuard::set(tmp.path()); let err = run_summarize(&["fresh-ns".to_string()]) - .expect_err("run should still surface the local ai runtime requirement"); + .expect_err("should error without any summarization provider"); + let msg = err.to_string(); assert!( - err.to_string() - .contains("tree summarizer requires local_ai to be enabled in config"), - "unexpected run_summarize error: {err:#}" + msg.contains("no summarization provider"), + "error should name the missing provider: {msg}" ); } @@ -667,7 +671,14 @@ mod tests { } #[test] - fn run_and_rebuild_surface_local_ai_runtime_requirement() { + fn run_and_rebuild_no_longer_block_on_local_ai_precondition() { + // #002 FR-007: the summarizer used to hard-error "requires local_ai to + // be enabled" when local AI was off, which left Build Summary Trees + // dead for cloud-only setups. It now builds the configured cloud + // provider instead. The commands may still surface a downstream error + // (e.g. a network/auth failure when actually calling the cloud model in + // a test sandbox), but they must NOT fail on the old local-AI + // precondition. This test asserts that specific regression is gone. let tmp = TempDir::new().unwrap(); let _workspace = WorkspaceEnvGuard::set(tmp.path()); @@ -680,15 +691,19 @@ mod tests { ]) .is_ok()); - let run_err = run_summarize(&["ns".to_string()]).expect_err("run should require local ai"); - assert!(run_err - .to_string() - .contains("requires local_ai to be enabled")); - - let rebuild_err = - run_rebuild(&["ns".to_string()]).expect_err("rebuild should require local ai"); - assert!(rebuild_err - .to_string() - .contains("requires local_ai to be enabled")); + // Whatever the outcome (Ok, or a downstream provider/network error), + // it must not be the local-AI precondition error. + if let Err(e) = run_summarize(&["ns".to_string()]) { + assert!( + !e.to_string().contains("requires local_ai to be enabled"), + "run should no longer block on the local_ai precondition: {e:#}" + ); + } + if let Err(e) = run_rebuild(&["ns".to_string()]) { + assert!( + !e.to_string().contains("requires local_ai to be enabled"), + "rebuild should no longer block on the local_ai precondition: {e:#}" + ); + } } } diff --git a/src/openhuman/memory_tree/tree_runtime/engine.rs b/src/openhuman/memory_tree/tree_runtime/engine.rs index 3b3f1946ad..59f79264f1 100644 --- a/src/openhuman/memory_tree/tree_runtime/engine.rs +++ b/src/openhuman/memory_tree/tree_runtime/engine.rs @@ -33,6 +33,7 @@ const MAX_SUMMARY_CHARS: usize = 20_000 * 4; pub async fn run_summarization( config: &Config, provider: &dyn Provider, + model: &str, namespace: &str, _ts: DateTime, ) -> Result> { @@ -79,7 +80,6 @@ pub async fn run_summarization( combined }; - let model = &config.local_ai.chat_model_id; let hour_summary = summarize_to_limit( provider, &to_summarize, @@ -128,8 +128,17 @@ pub async fn run_summarization( last_hour_node = Some(hour_node); } - // Deduplicate and propagate in bottom-up order (days, months, years, root) + // Deduplicate and propagate in bottom-up order (days, months, years, root). + // + // #002 (FR-008): partial success. A single node's summarization failure + // (e.g. the LLM times out on one busy day) must NOT void the entire run — + // the hour leaves are already durably written, and the other ancestors + // should still propagate. We collect per-node failures and continue + // instead of `?`-aborting on the first one; the run still succeeds, the + // failures are logged with their node ids for diagnosis. let mut seen = std::collections::HashSet::new(); + let mut failed: Vec = Vec::new(); + let mut propagated: u32 = 0; for level in [ NodeLevel::Day, NodeLevel::Month, @@ -138,24 +147,43 @@ pub async fn run_summarization( ] { for (node_id, node_level) in &all_propagation_ids { if *node_level == level && seen.insert(node_id.clone()) { - propagate_node( - config, - provider, - namespace, - node_id, - level, - &config.local_ai.chat_model_id, - ) - .await - .with_context(|| format!("propagate {node_id}"))?; + match propagate_node(config, provider, namespace, node_id, level, model).await { + Ok(()) => propagated += 1, + Err(e) => { + log::warn!( + "[tree_summarizer] propagate failed (continuing) namespace='{namespace}' \ + node={node_id} level={}: {e:#}", + level.as_str() + ); + failed.push(node_id.clone()); + } + } } } } + if !failed.is_empty() { + log::warn!( + "[tree_summarizer] partial summarization for '{namespace}': {propagated} node(s) \ + propagated, {} failed ({:?})", + failed.len(), + failed + ); + } - // All hour leaves are durably written and propagation is complete. - // Now it's safe to delete the buffer entries. - store::buffer_delete(config, namespace, &buffer_filenames) - .context("delete buffer entries after successful summarization")?; + // Only clear the buffer when propagation was fully successful. If any nodes + // failed, keep the buffer entries so `run_hourly_loop` re-discovers this + // namespace on the next pass and retries the failed levels — otherwise a + // transient day/month/year/root failure becomes sticky degradation. + if failed.is_empty() { + store::buffer_delete(config, namespace, &buffer_filenames) + .context("delete buffer entries after successful summarization")?; + } else { + log::info!( + "[tree_summarizer] keeping buffer for '{namespace}' — {n} failed node(s) \ + will be retried on the next run", + n = failed.len() + ); + } Ok(last_hour_node) } @@ -166,6 +194,7 @@ pub async fn run_summarization( pub async fn rebuild_tree( config: &Config, provider: &dyn Provider, + model: &str, namespace: &str, ) -> Result { tracing::debug!("[tree_summarizer] rebuilding tree for namespace '{namespace}'"); @@ -239,26 +268,55 @@ pub async fn rebuild_tree( } } - // Propagate bottom-up: days, then months, then years, then root - let model = &config.local_ai.chat_model_id; + // Propagate bottom-up: days, then months, then years, then root. + // + // #002 (FR-008): partial success — a single node's failure must not abort + // the whole rebuild. Collect-and-continue (the hour leaves are already + // re-written above), logging each failure for diagnosis; the rebuild still + // returns the resulting status rather than erroring out wholesale. + let mut failed: Vec = Vec::new(); + let mut propagate = |id: &str, level: NodeLevel| { + let node_id = id.to_string(); + async move { + if let Err(e) = + propagate_node(config, provider, namespace, &node_id, level, model).await + { + log::warn!( + "[tree_summarizer] rebuild propagate failed (continuing) namespace='{namespace}' \ + node={node_id} level={}: {e:#}", + level.as_str() + ); + Some(node_id) + } else { + None + } + } + }; for day_id in &day_ids { - propagate_node(config, provider, namespace, day_id, NodeLevel::Day, model).await?; + if let Some(f) = propagate(day_id, NodeLevel::Day).await { + failed.push(f); + } } for month_id in &month_ids { - propagate_node( - config, - provider, - namespace, - month_id, - NodeLevel::Month, - model, - ) - .await?; + if let Some(f) = propagate(month_id, NodeLevel::Month).await { + failed.push(f); + } } for year_id in &year_ids { - propagate_node(config, provider, namespace, year_id, NodeLevel::Year, model).await?; + if let Some(f) = propagate(year_id, NodeLevel::Year).await { + failed.push(f); + } + } + if let Some(f) = propagate("root", NodeLevel::Root).await { + failed.push(f); + } + if !failed.is_empty() { + log::warn!( + "[tree_summarizer] partial rebuild for '{namespace}': {} node(s) failed ({:?})", + failed.len(), + failed + ); } - propagate_node(config, provider, namespace, "root", NodeLevel::Root, model).await?; let final_status = store::get_tree_status(config, namespace)?; @@ -529,7 +587,7 @@ fn collect_hour_leaves_recursive( /// /// This should be called once at application startup. The task runs /// indefinitely, sleeping until the next hour boundary. -pub async fn run_hourly_loop(config: Config, provider: Box) { +pub async fn run_hourly_loop(config: Config, provider: Box, model: String) { tracing::debug!("[tree_summarizer] hourly loop started"); loop { @@ -557,7 +615,7 @@ pub async fn run_hourly_loop(config: Config, provider: Box) { let ts = Utc::now(); let namespaces = discover_active_namespaces(&config); for ns in &namespaces { - match run_summarization(&config, provider.as_ref(), ns, ts).await { + match run_summarization(&config, provider.as_ref(), &model, ns, ts).await { Ok(Some(node)) => { tracing::debug!( "[tree_summarizer] hourly job completed for '{}': node {} ({} tokens)", diff --git a/src/openhuman/memory_tree/tree_runtime/engine_tests.rs b/src/openhuman/memory_tree/tree_runtime/engine_tests.rs index 3f43ba0489..d42614349c 100644 --- a/src/openhuman/memory_tree/tree_runtime/engine_tests.rs +++ b/src/openhuman/memory_tree/tree_runtime/engine_tests.rs @@ -43,6 +43,32 @@ impl Provider for StubProvider { } } +/// #002 (FR-008): a Provider that errors only when the system prompt names a +/// specific summarization level (`level_name` is embedded by +/// `summarize_to_limit`), succeeding otherwise. Lets a test force a single +/// propagation node to fail while the rest of the run proceeds. +struct FailAtLevelProvider { + fail_level: &'static str, + reply: String, +} + +#[async_trait] +impl Provider for FailAtLevelProvider { + async fn chat_with_system( + &self, + system: Option<&str>, + _message: &str, + _model: &str, + _temperature: f64, + ) -> anyhow::Result { + let sys = system.unwrap_or(""); + if sys.contains(&format!("at the {} level", self.fail_level)) { + anyhow::bail!("simulated {} summarization failure", self.fail_level); + } + Ok(self.reply.clone()) + } +} + // ── group_by_hour ──────────────────────────────────────────────────────── #[test] @@ -345,7 +371,7 @@ async fn run_summarization_empty_buffer_returns_none() { std::fs::create_dir_all(&cfg.workspace_dir).unwrap(); let provider = StubProvider::with_reply("should not be called"); let ts = Utc::now(); - let result = run_summarization(&cfg, &provider, "test-ns", ts) + let result = run_summarization(&cfg, &provider, "test-model", "test-ns", ts) .await .unwrap(); assert!(result.is_none(), "empty buffer must return None"); @@ -364,7 +390,9 @@ async fn run_summarization_drains_buffer_and_writes_hour_node() { store::buffer_write(&cfg, ns, "entry two", &ts, None).unwrap(); let provider = StubProvider::with_reply("hour leaf summary from LLM"); - let last_node = run_summarization(&cfg, &provider, ns, ts).await.unwrap(); + let last_node = run_summarization(&cfg, &provider, "test-model", ns, ts) + .await + .unwrap(); let node = last_node.expect("non-empty buffer must return an hour node"); log::debug!( @@ -398,7 +426,9 @@ async fn run_summarization_builds_ancestor_chain() { store::buffer_write(&cfg, ns, "test content", &ts, None).unwrap(); let provider = StubProvider::with_reply("summary text"); - run_summarization(&cfg, &provider, ns, ts).await.unwrap(); + run_summarization(&cfg, &provider, "test-model", ns, ts) + .await + .unwrap(); // Day, month, year, and root must all be present. assert!( @@ -434,7 +464,7 @@ async fn run_summarization_multi_hour_groups_produce_multiple_hour_leaves() { store::buffer_write(&cfg, ns, "afternoon entry", &ts_h14, None).unwrap(); let provider = StubProvider::with_reply("grouped summary"); - run_summarization(&cfg, &provider, ns, ts_h14) + run_summarization(&cfg, &provider, "test-model", ns, ts_h14) .await .unwrap(); @@ -509,7 +539,9 @@ async fn rebuild_tree_restores_buffer_and_rewrites_ancestors() { store::buffer_write(&cfg, ns, "pending buffer item", &ts, None).unwrap(); let provider = StubProvider::with_reply("rebuilt summary"); - let status = rebuild_tree(&cfg, &provider, ns).await.unwrap(); + let status = rebuild_tree(&cfg, &provider, "test-model", ns) + .await + .unwrap(); assert!(status.total_nodes >= 5, "expected leaf + ancestor chain"); let restored_buffer = store::buffer_read(&cfg, ns).unwrap(); @@ -532,6 +564,59 @@ async fn rebuild_tree_restores_buffer_and_rewrites_ancestors() { ); } +#[tokio::test] +async fn rebuild_tree_partial_success_when_one_level_fails() { + // #002 (FR-008): a single propagation node failing must NOT abort the + // whole rebuild. Seed two LARGE hour leaves so the day-level combine + // exceeds the 2000-token Day budget and forces an LLM call there; a + // provider that fails only at the "day" level makes that one node fail. + // The rebuild must still return Ok and the hour leaves must survive. + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + std::fs::create_dir_all(&cfg.workspace_dir).unwrap(); + let ns = "partial-rebuild"; + let ts = Utc.with_ymd_and_hms(2024, 3, 15, 10, 0, 0).unwrap(); + + // ~5000 chars each (~1250 tokens) → combined ~2500 tokens > Day budget 2000. + let big = "word ".repeat(1000); + let make_hour = |id: &str| TreeNode { + node_id: id.to_string(), + namespace: ns.to_string(), + level: NodeLevel::Hour, + parent_id: derive_parent_id(id), + summary: big.clone(), + token_count: estimate_tokens(&big), + child_count: 0, + created_at: ts, + updated_at: ts, + metadata: None, + }; + store::write_node(&cfg, &make_hour("2024/03/15/10")).unwrap(); + store::write_node(&cfg, &make_hour("2024/03/15/11")).unwrap(); + + let provider = FailAtLevelProvider { + fail_level: "day", + reply: "ok summary".to_string(), + }; + + // Must NOT error despite the day-level summarization failing. + let status = rebuild_tree(&cfg, &provider, "test-model", ns) + .await + .expect("partial failure must not abort the rebuild"); + + // The hour leaves (written before propagation) survive. + assert!( + status.total_nodes >= 2, + "hour leaves must survive a partial rebuild" + ); + assert!(store::read_node(&cfg, ns, "2024/03/15/10") + .unwrap() + .is_some()); + assert!(store::read_node(&cfg, ns, "2024/03/15/11") + .unwrap() + .is_some()); +} + #[tokio::test] async fn rebuild_tree_on_empty_namespace_is_noop() { let tmp = TempDir::new().unwrap(); @@ -539,7 +624,7 @@ async fn rebuild_tree_on_empty_namespace_is_noop() { std::fs::create_dir_all(&cfg.workspace_dir).unwrap(); let provider = StubProvider::with_reply("unused"); - let status = rebuild_tree(&cfg, &provider, "empty-rebuild") + let status = rebuild_tree(&cfg, &provider, "test-model", "empty-rebuild") .await .unwrap(); assert_eq!(status.total_nodes, 0); diff --git a/src/openhuman/memory_tree/tree_runtime/ops.rs b/src/openhuman/memory_tree/tree_runtime/ops.rs index 204246018a..82d3adf6a6 100644 --- a/src/openhuman/memory_tree/tree_runtime/ops.rs +++ b/src/openhuman/memory_tree/tree_runtime/ops.rs @@ -44,10 +44,10 @@ pub async fn tree_summarizer_run( ) -> Result, String> { store::validate_namespace(namespace)?; - let provider = create_provider(config)?; + let (provider, model) = create_provider(config)?; let ts = Utc::now(); - match engine::run_summarization(config, provider.as_ref(), namespace.trim(), ts).await { + match engine::run_summarization(config, provider.as_ref(), &model, namespace.trim(), ts).await { Ok(Some(node)) => Ok(RpcOutcome::single_log( serde_json::to_value(&node).map_err(|e| e.to_string())?, format!( @@ -126,9 +126,9 @@ pub async fn tree_summarizer_rebuild( ) -> Result, String> { store::validate_namespace(namespace)?; - let provider = create_provider(config)?; + let (provider, model) = create_provider(config)?; - let status = engine::rebuild_tree(config, provider.as_ref(), namespace.trim()) + let status = engine::rebuild_tree(config, provider.as_ref(), &model, namespace.trim()) .await .map_err(|e| format!("rebuild failed: {e:#}"))?; @@ -144,15 +144,81 @@ pub async fn tree_summarizer_rebuild( // ── Helper ───────────────────────────────────────────────────────────── +/// Build the (provider, model) pair the summarizer runs on (#002 FR-007). +/// +/// Historically this hard-required local AI ("private + offline"), which left +/// "Build Summary Trees" dead for cloud-only setups (Tencent/OpenRouter with +/// no local Ollama). It now falls back to the **configured cloud chat +/// provider** for the summarization role when local AI is off, returning that +/// provider's model id alongside it so the engine targets the right model +/// (the engine no longer assumes the local model id). The UI shows a +/// Resolve the summarization provider. +/// +/// Priority: +/// 1. Local Ollama when `local_ai.runtime_enabled = true`. +/// 2. Cloud via `create_chat_provider` when +/// `memory_tree.cloud_summarization_opt_in = true` — the user has +/// explicitly acknowledged that memory summaries will be sent to an +/// external provider. +/// 3. Error otherwise — "Build Summary Trees" is local-only by default; +/// the user must opt in to cloud summarization in Settings → AI → Memory. fn create_provider( config: &Config, -) -> Result, String> { - // Tree summarization runs exclusively on local AI to keep memory - // processing private and offline — no backend calls. - if !config.local_ai.runtime_enabled { - return Err("tree summarizer requires local_ai to be enabled in config".to_string()); +) -> Result< + ( + Box, + String, + ), + String, +> { + if config.local_ai.runtime_enabled { + // Local path: Ollama + the user's local chat model. + let provider = create_local_ai_provider(config)?; + return Ok((provider, config.local_ai.chat_model_id.clone())); + } + + if !config.memory_tree.cloud_summarization_opt_in { + return Err("no summarization provider — enable local AI, or enable \ + cloud summarization in Settings → AI → Memory" + .to_string()); + } + + // Cloud path — user has explicitly opted in. Build the configured + // provider for the summarization role (`memory_provider` hint). + crate::openhuman::inference::provider::factory::create_chat_provider("summarization", config) + .map_err(|e| format!("tree summarizer: failed to build cloud provider: {e:#}")) +} + +/// Whether a summarization provider can be resolved for "Build Summary Trees" +/// under the current config — the single source of truth the memory doctor +/// reuses so its `summary_tree` stage matches the runtime path (#002 FR-007). +/// +/// Routes through [`create_provider`] (the SAME resolver the runtime uses): +/// - local AI enabled ⇒ available (local Ollama path). +/// - local AI off + `memory_tree.cloud_summarization_opt_in = true` ⇒ +/// available iff the configured summarization-role provider resolves. +/// - local AI off + opt-in `false` (default) ⇒ unavailable — explicit +/// consent required before routing workspace memory summaries to a cloud +/// provider. Enable in Settings → AI → Memory. +/// +/// The provider built for the `Ok` check is dropped — construction is cheap +/// (no network) and confirming by build beats guessing. +pub fn summarizer_available(config: &Config) -> (bool, &'static str) { + let local = config.local_ai.runtime_enabled; + match create_provider(config) { + Ok(_) if local => ( + true, + "local AI enabled — Build Summary Trees runs on the local model", + ), + Ok(_) => ( + true, + "local AI off — Build Summary Trees runs on the configured cloud provider", + ), + Err(_) => ( + false, + "no summarization provider available — enable local AI or configure a cloud provider in Settings → AI", + ), } - create_local_ai_provider(config) } /// Create a provider backed by the local Ollama instance for summarization, @@ -230,14 +296,45 @@ mod tests { } #[test] - fn create_provider_requires_local_ai_runtime() { + fn create_provider_uses_local_model_when_local_ai_enabled() { + // #002 FR-007: local path returns the user's local chat model. + let mut cfg = Config::default(); + cfg.local_ai.runtime_enabled = true; + cfg.local_ai.chat_model_id = "qwen2.5:7b".to_string(); + let (_provider, model) = create_provider(&cfg).expect("local provider should build"); + assert_eq!(model, "qwen2.5:7b"); + } + + #[test] + fn create_provider_errors_without_cloud_opt_in() { + // By default, cloud summarization is off — memory summaries are + // sensitive, so an explicit opt-in is required before routing them to + // an external provider. let mut cfg = Config::default(); cfg.local_ai.runtime_enabled = false; - let err = match create_provider(&cfg) { - Ok(_) => panic!("runtime-disabled config should fail"), - Err(err) => err, - }; - assert!(err.contains("requires local_ai to be enabled")); + // cloud_summarization_opt_in defaults to false + match create_provider(&cfg) { + Err(e) => assert!( + e.contains("no summarization provider"), + "unexpected error: {e}" + ), + Ok(_) => panic!("expected error without cloud opt-in"), + } + } + + #[test] + fn create_provider_uses_cloud_when_opted_in_and_local_ai_off() { + // #002 FR-007: with explicit opt-in Build Summary Trees uses the + // configured cloud provider when local AI is disabled. + let mut cfg = Config::default(); + cfg.local_ai.runtime_enabled = false; + cfg.memory_tree.cloud_summarization_opt_in = true; + let (_provider, model) = + create_provider(&cfg).expect("cloud fallback should build when opted in"); + assert!( + !model.trim().is_empty(), + "cloud fallback must resolve a model" + ); } #[test] @@ -403,18 +500,26 @@ mod tests { } #[tokio::test] - async fn tree_summarizer_run_and_rebuild_require_local_ai() { + async fn tree_summarizer_run_skips_cleanly_with_cloud_fallback_and_empty_buffer() { + // #002 FR-007 (Gray review updated): with local AI off AND explicit cloud + // opt-in, run/rebuild do not hard-error on the provider precondition. + // With an empty buffer, `run` reports the normal "no buffered data" skip. let (_tmp, mut cfg) = config_in_tempdir(); cfg.local_ai.runtime_enabled = false; + cfg.memory_tree.cloud_summarization_opt_in = true; - let run_err = tree_summarizer_run(&cfg, "team") + let outcome = tree_summarizer_run(&cfg, "team") .await - .expect_err("run should require local ai"); - assert!(run_err.contains("requires local_ai to be enabled")); + .expect("run should not error on the provider precondition when opted in"); + assert_eq!( + outcome.value, + json!({ "skipped": true, "reason": "no buffered data" }) + ); - let rebuild_err = tree_summarizer_rebuild(&cfg, "team") + // Rebuild on an empty tree returns the (zero-node) status, not an error. + let rebuilt = tree_summarizer_rebuild(&cfg, "team") .await - .expect_err("rebuild should require local ai"); - assert!(rebuild_err.contains("requires local_ai to be enabled")); + .expect("rebuild should not error on the provider precondition when opted in"); + assert_eq!(rebuilt.value["total_nodes"], 0); } } diff --git a/src/openhuman/tools/ops.rs b/src/openhuman/tools/ops.rs index 5b15828d6b..68483f9ade 100644 --- a/src/openhuman/tools/ops.rs +++ b/src/openhuman/tools/ops.rs @@ -199,6 +199,9 @@ pub fn all_tools_with_runtime( Box::new(MemoryStoreTool::new(memory.clone(), security.clone())), Box::new(MemoryRecallTool::new(memory.clone())), Box::new(MemoryForgetTool::new(memory.clone(), security.clone())), + // #002: read-only self-diagnosis of the memory pipeline so the agent + // can explain an empty/stalled wiki + the fix. + Box::new(MemoryDoctorTool::new(config.clone())), Box::new(MemoryQueryTool), Box::new(MemoryQueryWalkTool), Box::new(SmartMemoryWalkTool), diff --git a/tests/memory_threads_raw_coverage_e2e.rs b/tests/memory_threads_raw_coverage_e2e.rs index 2bc3aec85e..681e8eccd9 100644 --- a/tests/memory_threads_raw_coverage_e2e.rs +++ b/tests/memory_threads_raw_coverage_e2e.rs @@ -4614,14 +4614,16 @@ async fn tree_summarizer_ops_cover_validation_query_and_local_provider_guards() ) .await .unwrap_err(); - assert!(provider_guard.contains("local_ai")); + // No local AI + cloud-summarization opt-in defaults off ⇒ the guard names the + // local-AI remediation in user-facing prose ("enable local AI ..."). + assert!(provider_guard.contains("local AI")); let rebuild_guard = openhuman_core::openhuman::memory_tree::tree_runtime::ops::tree_summarizer_rebuild( &config, "ops_ns", ) .await .unwrap_err(); - assert!(rebuild_guard.contains("local_ai")); + assert!(rebuild_guard.contains("local AI")); } #[tokio::test] diff --git a/tests/memory_tree_memory_round23_raw_coverage_e2e.rs b/tests/memory_tree_memory_round23_raw_coverage_e2e.rs index cbe21f4150..c4fd639b8d 100644 --- a/tests/memory_tree_memory_round23_raw_coverage_e2e.rs +++ b/tests/memory_tree_memory_round23_raw_coverage_e2e.rs @@ -221,7 +221,7 @@ async fn tree_runtime_engine_summarizes_preserves_buffer_and_rebuilds() { ) .expect("buffer second entry"); - let hour = engine::run_summarization(&config, &provider, namespace, ts) + let hour = engine::run_summarization(&config, &provider, "test-model", namespace, ts) .await .expect("run summarization") .expect("hour node"); @@ -239,10 +239,12 @@ async fn tree_runtime_engine_summarizes_preserves_buffer_and_rebuilds() { assert!(node.summary.contains("round23 summary") || node.summary.contains("##")); } - assert!(engine::run_summarization(&config, &provider, namespace, ts) - .await - .expect("empty run") - .is_none()); + assert!( + engine::run_summarization(&config, &provider, "test-model", namespace, ts) + .await + .expect("empty run") + .is_none() + ); tree_runtime_store::buffer_write( &config, @@ -253,7 +255,7 @@ async fn tree_runtime_engine_summarizes_preserves_buffer_and_rebuilds() { ) .expect("buffer pending entry"); - let status = engine::rebuild_tree(&config, &provider, namespace) + let status = engine::rebuild_tree(&config, &provider, "test-model", namespace) .await .expect("rebuild tree"); assert!(status.total_nodes >= 5); diff --git a/tests/memory_tree_summarizer_e2e.rs b/tests/memory_tree_summarizer_e2e.rs index 1a7e9176cd..640aaee344 100644 --- a/tests/memory_tree_summarizer_e2e.rs +++ b/tests/memory_tree_summarizer_e2e.rs @@ -233,7 +233,7 @@ async fn builds_hour_day_month_year_chain() { ]); log::debug!("[memory_tree_summarizer_e2e] running summarization"); - let result = engine::run_summarization(&config, &provider, NS, Utc::now()).await; + let result = engine::run_summarization(&config, &provider, "test-model", NS, Utc::now()).await; log::debug!( "[memory_tree_summarizer_e2e] run_summarization returned: {:?}", @@ -379,7 +379,7 @@ async fn merges_into_existing_hour_node() { )]); log::debug!("[memory_tree_summarizer_e2e] first run"); - let r1 = engine::run_summarization(&config, &provider1, NS, Utc::now()) + let r1 = engine::run_summarization(&config, &provider1, "test-model", NS, Utc::now()) .await .expect("first run_summarization"); assert!(r1.is_some(), "first run should yield a node"); @@ -415,7 +415,7 @@ async fn merges_into_existing_hour_node() { )]); log::debug!("[memory_tree_summarizer_e2e] second run (same hour)"); - let r2 = engine::run_summarization(&config, &provider2, NS, Utc::now()) + let r2 = engine::run_summarization(&config, &provider2, "test-model", NS, Utc::now()) .await .expect("second run_summarization"); assert!(r2.is_some(), "second run should yield a node"); @@ -504,7 +504,7 @@ async fn survives_llm_error_with_partial_progress() { ]); log::debug!("[memory_tree_summarizer_e2e] running summarization expecting partial failure"); - let result = engine::run_summarization(&config, &provider, NS, Utc::now()).await; + let result = engine::run_summarization(&config, &provider, "test-model", NS, Utc::now()).await; log::debug!( "[memory_tree_summarizer_e2e] run_summarization result: is_ok={}", diff --git a/tests/memory_tree_sync_raw_coverage_e2e.rs b/tests/memory_tree_sync_raw_coverage_e2e.rs index e99eb678ef..bd31d86669 100644 --- a/tests/memory_tree_sync_raw_coverage_e2e.rs +++ b/tests/memory_tree_sync_raw_coverage_e2e.rs @@ -241,7 +241,7 @@ async fn tree_runtime_engine_rpc_and_walk_cover_success_and_edge_paths() { "rebuilt hour 10", "rebuilt hour 11", ]); - let last = engine::run_summarization(&cfg, &provider, ns, Utc::now()) + let last = engine::run_summarization(&cfg, &provider, "test-model", ns, Utc::now()) .await .expect("run summarization") .expect("last hour node"); @@ -275,7 +275,7 @@ async fn tree_runtime_engine_rpc_and_walk_cover_success_and_edge_paths() { "rebuilt year summary", "rebuilt root summary", ]); - let rebuilt = engine::rebuild_tree(&cfg, &rebuild_provider, ns) + let rebuilt = engine::rebuild_tree(&cfg, &rebuild_provider, "test-model", ns) .await .expect("rebuild tree"); assert_eq!(rebuilt.total_nodes, 6);