From feeb901e5727880c9e45f1b0b0be0986e2e123e4 Mon Sep 17 00:00:00 2001 From: Paco POR-CORREO Date: Mon, 6 Apr 2026 19:34:31 +0200 Subject: [PATCH] Add persistent evaluation logs for RAG testing --- RAG/docs/API_RAG.md | 45 +++++++++ RAG/docs/PLAYGROUND.md | 15 +++ RAG/public/playground/app.js | 60 ++++++++++++ RAG/public/playground/index.html | 13 +++ RAG/src/app.ts | 127 +++++++++++++++++++++++++- RAG/src/config/env.ts | 1 + RAG/src/modules/logs/service.ts | 101 ++++++++++++++++++++ RAG/src/modules/vectorstore/client.ts | 2 +- RAG/src/shared/types/rag.ts | 36 ++++++++ docs/HISTORIAL_SESIONES.md | 1 + 10 files changed, 398 insertions(+), 3 deletions(-) create mode 100644 RAG/src/modules/logs/service.ts diff --git a/RAG/docs/API_RAG.md b/RAG/docs/API_RAG.md index 2560d05..437b35f 100644 --- a/RAG/docs/API_RAG.md +++ b/RAG/docs/API_RAG.md @@ -407,6 +407,51 @@ Campos esperados del formulario: Si se usa `sourceId`, el archivo subido no se mezcla con otros scopes salvo que elijas reutilizar ese mismo identificador. +--- + +### 8. `GET /logs/recent` + +Devuelve los logs recientes de evaluacion guardados por el sistema. + +Sirve para revisar: +- consultas con contexto insuficiente +- respuestas problemáticas +- logs manuales marcados por el usuario + +Ejemplo: + +```bash +curl -sS "https://rag.por-correo.com/logs/recent" +``` + +--- + +### 9. `POST /logs/manual` + +Permite registrar manualmente una consulta o respuesta que quieras revisar despues. + +Es util cuando: +- la respuesta no te convence +- detectas una carencia del RAG +- quieres dejar una nota humana asociada a una consulta + +Payload base: + +```json +{ + "operation": "answer", + "reason": "manual_review_requested", + "query": "prueba de log manual", + "mode": "documental", + "intent": "specific", + "model": "openai/gpt-4.1-mini", + "note": "la respuesta parece demasiado generica", + "scope": { + "sourceRef": "/home/pancho/Documentos/Empresa/Desarrollo/IA/docs" + } +} +``` + Respuesta esperada resumida: ```json diff --git a/RAG/docs/PLAYGROUND.md b/RAG/docs/PLAYGROUND.md index a1d8542..fe77e2e 100644 --- a/RAG/docs/PLAYGROUND.md +++ b/RAG/docs/PLAYGROUND.md @@ -138,6 +138,21 @@ Eso significa: --- +## Logs de evaluacion + +El playground ya soporta dos vias de logging: + +1. `log automatico` +- cuando la respuesta o el contexto indican insuficiencia relevante + +2. `log manual` +- cuando el usuario pulsa el boton para registrar la consulta actual +- puede añadir una nota explicativa propia + +Los logs quedan guardados en `Qdrant`, por lo que no dependen del filesystem efimero del contenedor. + +--- + ## Idea de uso Este playground no sustituye a clientes finales ni al futuro MCP. diff --git a/RAG/public/playground/app.js b/RAG/public/playground/app.js index 67fab2d..0afcd90 100644 --- a/RAG/public/playground/app.js +++ b/RAG/public/playground/app.js @@ -5,6 +5,7 @@ const replaceBootstrapButton = document.getElementById("replaceBootstrapButton") const clearBootstrapButton = document.getElementById("clearBootstrapButton"); const sendChatButton = document.getElementById("sendChatButton"); const clearChatButton = document.getElementById("clearChatButton"); +const manualLogButton = document.getElementById("manualLogButton"); const presetDocs = document.getElementById("presetDocs"); const presetRagDocs = document.getElementById("presetRagDocs"); const presetCode = document.getElementById("presetCode"); @@ -19,6 +20,7 @@ const chatMessages = document.getElementById("chatMessages"); const contextIndicator = document.getElementById("contextIndicator"); const contextStatusText = document.getElementById("contextStatusText"); const contextScopeText = document.getElementById("contextScopeText"); +const logsResult = document.getElementById("logsResult"); const ingestSourceType = document.getElementById("ingestSourceType"); const ingestScopeMode = document.getElementById("ingestScopeMode"); @@ -45,10 +47,12 @@ const compareWithoutRag = document.getElementById("compareWithoutRag"); const chatMode = document.getElementById("chatMode"); const chatScopeInfo = document.getElementById("chatScopeInfo"); const chatInput = document.getElementById("chatInput"); +const manualLogNote = document.getElementById("manualLogNote"); let lastBootstrapContext = null; let chatHistory = []; let availableScopes = []; +let lastInteraction = null; function format(value) { return JSON.stringify(value, null, 2); @@ -221,6 +225,15 @@ async function loadAnswerModels() { } } +async function loadRecentLogs() { + try { + const logs = await fetch("/logs/recent").then((response) => response.json()); + logsResult.textContent = format(logs); + } catch (error) { + logsResult.textContent = String(error); + } +} + document.querySelectorAll(".tab-button").forEach((button) => { button.addEventListener("click", () => { document.querySelectorAll(".tab-button").forEach((entry) => entry.classList.remove("active")); @@ -287,6 +300,7 @@ ingestButton.addEventListener("click", async () => { ingestResult.textContent = format(data); await loadScopes(); + await loadRecentLogs(); updateIngestUiState(); } catch (error) { ingestResult.textContent = String(error); @@ -308,6 +322,19 @@ async function executeBootstrap() { lastBootstrapContext = data; bootstrapResult.textContent = format(data); renderBootstrapContext(); + lastInteraction = { + operation: "retrieve", + query: bootstrapQuery.value, + mode: bootstrapMode.value, + intent: "bootstrap", + model: answerModel.value, + scope: buildScopeFromInputs(), + usedBootstrapContext: false, + usedAdditionalRetrieve: useModelInRetrieve.checked, + responseSummary: data.modelSummary || data.summary, + retrievedItems: data.items || [] + }; + await loadRecentLogs(); } catch (error) { bootstrapResult.textContent = String(error); } @@ -348,6 +375,19 @@ sendChatButton.addEventListener("click", async () => { chatHistory.push({ role: "assistant", content: response.answer }); renderChatHistory(); mainResult.textContent = format(response); + lastInteraction = { + operation: "chat", + query: message, + mode: chatMode.value, + intent: "specific", + model: answerModel.value, + scope: buildScopeFromInputs(), + usedBootstrapContext: response.usedBootstrapContext, + usedAdditionalRetrieve: response.usedAdditionalRetrieve, + responseSummary: response.answer, + retrievedItems: response.retrieved?.items || [] + }; + await loadRecentLogs(); if (compareWithoutRag.checked) { const comparison = await request("/answer/direct", { @@ -374,6 +414,25 @@ clearChatButton.addEventListener("click", () => { compareResult.textContent = "Desactivada."; }); +manualLogButton.addEventListener("click", async () => { + if (!lastInteraction) { + mainResult.textContent = "No hay una consulta previa para registrar en logs."; + return; + } + + try { + const entry = await request("/logs/manual", { + ...lastInteraction, + reason: "manual_review_requested", + note: manualLogNote.value.trim() || undefined + }); + logsResult.textContent = format(entry); + await loadRecentLogs(); + } catch (error) { + logsResult.textContent = String(error); + } +}); + presetDocs.addEventListener("click", () => { applyPreset( "documental", @@ -403,6 +462,7 @@ presetCode.addEventListener("click", () => { loadScopes(); loadAnswerModels(); +loadRecentLogs(); renderBootstrapContext(); renderChatHistory(); updateIngestUiState(); diff --git a/RAG/public/playground/index.html b/RAG/public/playground/index.html index 43851a8..4780b1c 100644 --- a/RAG/public/playground/index.html +++ b/RAG/public/playground/index.html @@ -175,6 +175,14 @@ + + +
+ +
+

Ultima respuesta estructurada

Sin ejecutar aun.
@@ -192,6 +200,11 @@

Estado / health

Sin comprobar.
+ +
+

Logs recientes

+
Sin cargar aun.
+
diff --git a/RAG/src/app.ts b/RAG/src/app.ts index d0fada5..049b548 100644 --- a/RAG/src/app.ts +++ b/RAG/src/app.ts @@ -9,6 +9,7 @@ import { env } from "./config/env.js"; import { AnswerService } from "./modules/answer/service.js"; import { IngestService } from "./modules/ingest/service.js"; import { OpenRouterEmbeddingProvider } from "./modules/embeddings/provider.js"; +import { EvaluationLogService } from "./modules/logs/service.js"; import { documentalChunkingPolicy } from "./modules/process/chunking.js"; import { RetrieveService } from "./modules/retrieve/service.js"; import { supportedParserExtensions } from "./modules/parsers/parser-registry.js"; @@ -27,10 +28,20 @@ export function createApp() { const upload = multer({ storage: multer.memoryStorage() }); const embeddingProvider = new OpenRouterEmbeddingProvider(); const vectorStore = new QdrantVectorStoreClient(); + const evaluationLogs = new EvaluationLogService(embeddingProvider); const ingestService = new IngestService(embeddingProvider, vectorStore); const retrieveService = new RetrieveService(embeddingProvider, vectorStore); const answerService = new AnswerService(retrieveService); + function needsContextLog(summary?: string, itemsCount = 0, answer?: string) { + if (itemsCount === 0) { + return true; + } + + const text = `${summary ?? ""} ${answer ?? ""}`; + return /no se recupero contexto|no hay informacion suficiente|no dispongo de mas detalles|contexto insuficiente/i.test(text); + } + app.use(express.json({ limit: "5mb" })); app.use(express.static(publicDir)); @@ -79,6 +90,16 @@ export function createApp() { } }); + app.get("/logs/recent", async (req, res) => { + try { + const limit = req.query.limit ? Number(req.query.limit) : 20; + const logs = await evaluationLogs.listRecent(limit); + res.json(logs); + } catch (error) { + res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown logs error" }); + } + }); + app.post("/ingest", async (req, res) => { try { const result = await ingestService.ingest(req.body); @@ -142,15 +163,48 @@ export function createApp() { } : undefined; const result = await retrieveService.retrieve(mode, intent, query, scope); + const items = result.items; if (useModel) { const modelSummary = await answerService.summarizeRetrieve(query, result, model); - res.json({ + const payload = { ...result, model: modelSummary.model, modelSummary: modelSummary.summary - }); + }; + + if (needsContextLog(payload.modelSummary, items.length)) { + await evaluationLogs.log({ + trigger: "automatic", + operation: "retrieve", + reason: "retrieve_context_insufficient", + query, + mode, + intent, + scope, + model: payload.model, + responseSummary: payload.modelSummary, + retrievedItems: items + }); + } + + res.json(payload); return; } + + if (needsContextLog(result.summary, items.length)) { + await evaluationLogs.log({ + trigger: "automatic", + operation: "retrieve", + reason: "retrieve_context_insufficient", + query, + mode, + intent, + scope, + responseSummary: result.summary, + retrievedItems: items + }); + } + res.json(result); } catch (error) { res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown retrieve error" }); @@ -172,6 +226,34 @@ export function createApp() { const model = req.body.model ? String(req.body.model) : undefined; const preloadedContext = req.body.preloadedContext ? String(req.body.preloadedContext) : undefined; const result = await answerService.answer(mode, intent, query, scope, model, preloadedContext); + + if (needsContextLog(result.summary, result.citations.length, result.answer)) { + await evaluationLogs.log({ + trigger: "automatic", + operation: "answer", + reason: "answer_context_insufficient", + query, + mode, + intent, + scope, + model: result.model, + note: preloadedContext ? "bootstrap_context_present" : undefined, + usedBootstrapContext: Boolean(preloadedContext), + responseSummary: result.answer, + retrievedItems: result.citations.map((citation) => ({ + chunkId: citation.chunkId, + documentId: citation.documentId, + sourceId: scope?.sourceId ?? "", + title: citation.title, + sectionTitle: citation.sectionTitle, + content: "", + score: 0, + startLine: citation.startLine, + endLine: citation.endLine + })) + }); + } + res.json(result); } catch (error) { res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown answer error" }); @@ -224,11 +306,52 @@ export function createApp() { allowAdditionalRetrieve }); + if (needsContextLog(result.retrieved?.summary, result.retrieved?.items.length ?? 0, result.answer)) { + await evaluationLogs.log({ + trigger: "automatic", + operation: "chat", + reason: "chat_context_insufficient", + query: message, + mode, + intent: "specific", + scope, + model: result.model, + usedBootstrapContext: result.usedBootstrapContext, + usedAdditionalRetrieve: result.usedAdditionalRetrieve, + responseSummary: result.answer, + retrievedItems: result.retrieved?.items ?? [] + }); + } + res.json(result); } catch (error) { res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown chat error" }); } }); + app.post("/logs/manual", async (req, res) => { + try { + const query = String(req.body.query ?? ""); + const entry = await evaluationLogs.log({ + trigger: "manual", + operation: req.body.operation === "chat" || req.body.operation === "retrieve" ? req.body.operation : "answer", + reason: req.body.reason ? String(req.body.reason) : "manual_review_requested", + query, + mode: req.body.mode, + intent: req.body.intent, + scope: req.body.scope, + model: req.body.model, + note: req.body.note ? String(req.body.note) : undefined, + usedBootstrapContext: Boolean(req.body.usedBootstrapContext), + usedAdditionalRetrieve: Boolean(req.body.usedAdditionalRetrieve), + responseSummary: req.body.responseSummary ? String(req.body.responseSummary) : undefined, + retrievedItems: Array.isArray(req.body.retrievedItems) ? req.body.retrievedItems : [] + }); + res.status(201).json(entry); + } catch (error) { + res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown manual log error" }); + } + }); + return app; } diff --git a/RAG/src/config/env.ts b/RAG/src/config/env.ts index dd282a7..7720e24 100644 --- a/RAG/src/config/env.ts +++ b/RAG/src/config/env.ts @@ -17,6 +17,7 @@ export const env = { qdrantUrl: requireEnv("QDRANT_URL", "http://localhost:6333"), qdrantApiKey: process.env.QDRANT_API_KEY ?? "", qdrantCollection: requireEnv("QDRANT_COLLECTION", "rag_chunks"), + qdrantLogsCollection: requireEnv("QDRANT_LOGS_COLLECTION", "rag_eval_logs"), embeddingProvider: requireEnv("EMBEDDING_PROVIDER", "openrouter"), embeddingModel: requireEnv("EMBEDDING_MODEL", "qwen/qwen3-embedding-8b"), embeddingBaseUrl: requireEnv("EMBEDDING_BASE_URL", "https://openrouter.ai/api/v1"), diff --git a/RAG/src/modules/logs/service.ts b/RAG/src/modules/logs/service.ts new file mode 100644 index 0000000..6d85225 --- /dev/null +++ b/RAG/src/modules/logs/service.ts @@ -0,0 +1,101 @@ +import { createHash } from "node:crypto"; +import type { QdrantClient } from "@qdrant/js-client-rest"; +import { env } from "../../config/env.js"; +import type { EmbeddingProvider } from "../embeddings/provider.js"; +import { buildQdrantClient } from "../vectorstore/client.js"; +import type { EvaluationLogEntry, EvaluationLogInput } from "../../shared/types/rag.js"; + +function buildLogId(input: EvaluationLogInput): string { + const seed = `${Date.now()}::${input.operation}::${input.query}::${input.reason}`; + const hex = createHash("sha1").update(seed).digest("hex").slice(0, 32); + return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`; +} + +export class EvaluationLogService { + private readonly client: QdrantClient; + private collectionReady = false; + + constructor(private readonly embeddingProvider: EmbeddingProvider) { + this.client = buildQdrantClient(); + } + + async log(input: EvaluationLogInput): Promise { + const [vector] = await this.embeddingProvider.embed([input.query || input.reason]); + await this.ensureCollection(vector.length); + + const entry: EvaluationLogEntry = { + id: buildLogId(input), + trigger: input.trigger, + operation: input.operation, + reason: input.reason, + query: input.query, + mode: input.mode, + intent: input.intent, + model: input.model, + note: input.note, + createdAt: new Date().toISOString(), + scope: input.scope, + usedBootstrapContext: input.usedBootstrapContext, + usedAdditionalRetrieve: input.usedAdditionalRetrieve, + responseSummary: input.responseSummary, + retrievedItemsCount: input.retrievedItems?.length ?? 0, + chunkIds: (input.retrievedItems ?? []).map((item) => item.chunkId), + documentIds: [...new Set((input.retrievedItems ?? []).map((item) => item.documentId))] + }; + + await this.client.upsert(env.qdrantLogsCollection, { + wait: true, + points: [{ + id: entry.id, + vector, + payload: entry as unknown as Record + }] + }); + + return entry; + } + + async listRecent(limit = 20): Promise { + const collections = await this.client.getCollections(); + const exists = collections.collections.some((collection) => collection.name === env.qdrantLogsCollection); + + if (!exists) { + return []; + } + + const response = await this.client.scroll(env.qdrantLogsCollection, { + limit: Math.min(limit * 3, 100), + with_payload: true + }); + + return response.points + .map((point) => point.payload as unknown as EvaluationLogEntry) + .filter(Boolean) + .sort((left, right) => right.createdAt.localeCompare(left.createdAt)) + .slice(0, Math.min(limit, 100)); + } + + private async ensureCollection(vectorSize: number): Promise { + if (this.collectionReady) { + return; + } + + const collections = await this.client.getCollections(); + const exists = collections.collections.some((collection) => collection.name === env.qdrantLogsCollection); + + if (!exists) { + await this.client.createCollection(env.qdrantLogsCollection, { + vectors: { + size: vectorSize, + distance: "Cosine" + } + }); + await this.client.createPayloadIndex(env.qdrantLogsCollection, { + field_name: "createdAt", + field_schema: "keyword" + }).catch(() => undefined); + } + + this.collectionReady = true; + } +} diff --git a/RAG/src/modules/vectorstore/client.ts b/RAG/src/modules/vectorstore/client.ts index 558b80d..25d0bec 100644 --- a/RAG/src/modules/vectorstore/client.ts +++ b/RAG/src/modules/vectorstore/client.ts @@ -2,7 +2,7 @@ import { QdrantClient } from "@qdrant/js-client-rest"; import { env } from "../../config/env.js"; import type { AvailableScope, IngestedChunk, RetrieveScope, RetrievedItem } from "../../shared/types/rag.js"; -function buildQdrantClient(): QdrantClient { +export function buildQdrantClient(): QdrantClient { const url = new URL(env.qdrantUrl); return new QdrantClient({ host: url.hostname, diff --git a/RAG/src/shared/types/rag.ts b/RAG/src/shared/types/rag.ts index dea862c..029c9e7 100644 --- a/RAG/src/shared/types/rag.ts +++ b/RAG/src/shared/types/rag.ts @@ -105,3 +105,39 @@ export interface ChatResponse { usedAdditionalRetrieve: boolean; retrieved?: RetrieveResponse; } + +export interface EvaluationLogInput { + trigger: "automatic" | "manual"; + operation: "retrieve" | "answer" | "chat"; + reason: string; + query: string; + mode?: ChunkMode; + intent?: RetrieveIntent; + scope?: RetrieveScope; + model?: string; + note?: string; + usedBootstrapContext?: boolean; + usedAdditionalRetrieve?: boolean; + responseSummary?: string; + retrievedItems?: RetrievedItem[]; +} + +export interface EvaluationLogEntry { + id: string; + trigger: "automatic" | "manual"; + operation: "retrieve" | "answer" | "chat"; + reason: string; + query: string; + mode?: ChunkMode; + intent?: RetrieveIntent; + model?: string; + note?: string; + createdAt: string; + scope?: RetrieveScope; + usedBootstrapContext?: boolean; + usedAdditionalRetrieve?: boolean; + responseSummary?: string; + retrievedItemsCount: number; + chunkIds: string[]; + documentIds: string[]; +} diff --git a/docs/HISTORIAL_SESIONES.md b/docs/HISTORIAL_SESIONES.md index 94edd18..c55e46f 100644 --- a/docs/HISTORIAL_SESIONES.md +++ b/docs/HISTORIAL_SESIONES.md @@ -66,6 +66,7 @@ Este archivo registra agentes y sesiones de trabajo de este workspace. - Ajuste de la API y del playground para hacer visible y seleccionable el modelo de `answer`, evitando dejarlo oculto como una decision fija del backend. - Evolucion del playground a una mecanica mas completa con pestañas `Ingesta / Bootstrap / Chat`, indicador visual de contexto activo y endpoint `/chat` con bootstrap reutilizable y consultas adicionales al RAG durante la conversacion. - Ampliacion de la ingesta y del playground para soportar upload directo de archivos y `sourceId` personalizado, permitiendo aislar documentos ajenos al RAG en scopes separados. +- Implementacion de logs de evaluacion persistentes en `Qdrant`, con disparo automatico por contexto insuficiente y registro manual con nota desde el playground. - Reorganizacion de RAG como modulo raiz independiente con documentacion propia en `RAG/docs/`. - Ajuste del indice documental global para reflejar la separacion entre documentacion global y documentacion por tool. - Creacion de `docs/TASK.md` para descomponer lineas de trabajo amplias en puntos de analisis y acuerdos.