Add persistent evaluation logs for RAG testing

This commit is contained in:
Paco POR-CORREO 2026-04-06 19:34:31 +02:00
parent d4cf3a8a51
commit feeb901e57
10 changed files with 398 additions and 3 deletions

View file

@ -407,6 +407,51 @@ Campos esperados del formulario:
Si se usa `sourceId`, el archivo subido no se mezcla con otros scopes salvo que elijas reutilizar ese mismo identificador.
---
### 8. `GET /logs/recent`
Devuelve los logs recientes de evaluacion guardados por el sistema.
Sirve para revisar:
- consultas con contexto insuficiente
- respuestas problemáticas
- logs manuales marcados por el usuario
Ejemplo:
```bash
curl -sS "https://rag.por-correo.com/logs/recent"
```
---
### 9. `POST /logs/manual`
Permite registrar manualmente una consulta o respuesta que quieras revisar despues.
Es util cuando:
- la respuesta no te convence
- detectas una carencia del RAG
- quieres dejar una nota humana asociada a una consulta
Payload base:
```json
{
"operation": "answer",
"reason": "manual_review_requested",
"query": "prueba de log manual",
"mode": "documental",
"intent": "specific",
"model": "openai/gpt-4.1-mini",
"note": "la respuesta parece demasiado generica",
"scope": {
"sourceRef": "/home/pancho/Documentos/Empresa/Desarrollo/IA/docs"
}
}
```
Respuesta esperada resumida:
```json

View file

@ -138,6 +138,21 @@ Eso significa:
---
## Logs de evaluacion
El playground ya soporta dos vias de logging:
1. `log automatico`
- cuando la respuesta o el contexto indican insuficiencia relevante
2. `log manual`
- cuando el usuario pulsa el boton para registrar la consulta actual
- puede añadir una nota explicativa propia
Los logs quedan guardados en `Qdrant`, por lo que no dependen del filesystem efimero del contenedor.
---
## Idea de uso
Este playground no sustituye a clientes finales ni al futuro MCP.

View file

@ -5,6 +5,7 @@ const replaceBootstrapButton = document.getElementById("replaceBootstrapButton")
const clearBootstrapButton = document.getElementById("clearBootstrapButton");
const sendChatButton = document.getElementById("sendChatButton");
const clearChatButton = document.getElementById("clearChatButton");
const manualLogButton = document.getElementById("manualLogButton");
const presetDocs = document.getElementById("presetDocs");
const presetRagDocs = document.getElementById("presetRagDocs");
const presetCode = document.getElementById("presetCode");
@ -19,6 +20,7 @@ const chatMessages = document.getElementById("chatMessages");
const contextIndicator = document.getElementById("contextIndicator");
const contextStatusText = document.getElementById("contextStatusText");
const contextScopeText = document.getElementById("contextScopeText");
const logsResult = document.getElementById("logsResult");
const ingestSourceType = document.getElementById("ingestSourceType");
const ingestScopeMode = document.getElementById("ingestScopeMode");
@ -45,10 +47,12 @@ const compareWithoutRag = document.getElementById("compareWithoutRag");
const chatMode = document.getElementById("chatMode");
const chatScopeInfo = document.getElementById("chatScopeInfo");
const chatInput = document.getElementById("chatInput");
const manualLogNote = document.getElementById("manualLogNote");
let lastBootstrapContext = null;
let chatHistory = [];
let availableScopes = [];
let lastInteraction = null;
function format(value) {
return JSON.stringify(value, null, 2);
@ -221,6 +225,15 @@ async function loadAnswerModels() {
}
}
async function loadRecentLogs() {
try {
const logs = await fetch("/logs/recent").then((response) => response.json());
logsResult.textContent = format(logs);
} catch (error) {
logsResult.textContent = String(error);
}
}
document.querySelectorAll(".tab-button").forEach((button) => {
button.addEventListener("click", () => {
document.querySelectorAll(".tab-button").forEach((entry) => entry.classList.remove("active"));
@ -287,6 +300,7 @@ ingestButton.addEventListener("click", async () => {
ingestResult.textContent = format(data);
await loadScopes();
await loadRecentLogs();
updateIngestUiState();
} catch (error) {
ingestResult.textContent = String(error);
@ -308,6 +322,19 @@ async function executeBootstrap() {
lastBootstrapContext = data;
bootstrapResult.textContent = format(data);
renderBootstrapContext();
lastInteraction = {
operation: "retrieve",
query: bootstrapQuery.value,
mode: bootstrapMode.value,
intent: "bootstrap",
model: answerModel.value,
scope: buildScopeFromInputs(),
usedBootstrapContext: false,
usedAdditionalRetrieve: useModelInRetrieve.checked,
responseSummary: data.modelSummary || data.summary,
retrievedItems: data.items || []
};
await loadRecentLogs();
} catch (error) {
bootstrapResult.textContent = String(error);
}
@ -348,6 +375,19 @@ sendChatButton.addEventListener("click", async () => {
chatHistory.push({ role: "assistant", content: response.answer });
renderChatHistory();
mainResult.textContent = format(response);
lastInteraction = {
operation: "chat",
query: message,
mode: chatMode.value,
intent: "specific",
model: answerModel.value,
scope: buildScopeFromInputs(),
usedBootstrapContext: response.usedBootstrapContext,
usedAdditionalRetrieve: response.usedAdditionalRetrieve,
responseSummary: response.answer,
retrievedItems: response.retrieved?.items || []
};
await loadRecentLogs();
if (compareWithoutRag.checked) {
const comparison = await request("/answer/direct", {
@ -374,6 +414,25 @@ clearChatButton.addEventListener("click", () => {
compareResult.textContent = "Desactivada.";
});
manualLogButton.addEventListener("click", async () => {
if (!lastInteraction) {
mainResult.textContent = "No hay una consulta previa para registrar en logs.";
return;
}
try {
const entry = await request("/logs/manual", {
...lastInteraction,
reason: "manual_review_requested",
note: manualLogNote.value.trim() || undefined
});
logsResult.textContent = format(entry);
await loadRecentLogs();
} catch (error) {
logsResult.textContent = String(error);
}
});
presetDocs.addEventListener("click", () => {
applyPreset(
"documental",
@ -403,6 +462,7 @@ presetCode.addEventListener("click", () => {
loadScopes();
loadAnswerModels();
loadRecentLogs();
renderBootstrapContext();
renderChatHistory();
updateIngestUiState();

View file

@ -175,6 +175,14 @@
<button id="clearChatButton" class="secondary">Limpiar chat</button>
</div>
<label>Nota opcional para log manual
<textarea id="manualLogNote" rows="3" placeholder="Ejemplo: la respuesta no usa bien el contexto del libro y parece demasiado generica"></textarea>
</label>
<div class="actions">
<button id="manualLogButton" class="secondary">Registrar esta consulta en logs</button>
</div>
<h3>Ultima respuesta estructurada</h3>
<pre id="mainResult">Sin ejecutar aun.</pre>
@ -192,6 +200,11 @@
<h2>Estado / health</h2>
<pre id="healthResult">Sin comprobar.</pre>
</section>
<section class="panel">
<h2>Logs recientes</h2>
<pre id="logsResult">Sin cargar aun.</pre>
</section>
</main>
<script src="/playground/app.js" type="module"></script>

View file

@ -9,6 +9,7 @@ import { env } from "./config/env.js";
import { AnswerService } from "./modules/answer/service.js";
import { IngestService } from "./modules/ingest/service.js";
import { OpenRouterEmbeddingProvider } from "./modules/embeddings/provider.js";
import { EvaluationLogService } from "./modules/logs/service.js";
import { documentalChunkingPolicy } from "./modules/process/chunking.js";
import { RetrieveService } from "./modules/retrieve/service.js";
import { supportedParserExtensions } from "./modules/parsers/parser-registry.js";
@ -27,10 +28,20 @@ export function createApp() {
const upload = multer({ storage: multer.memoryStorage() });
const embeddingProvider = new OpenRouterEmbeddingProvider();
const vectorStore = new QdrantVectorStoreClient();
const evaluationLogs = new EvaluationLogService(embeddingProvider);
const ingestService = new IngestService(embeddingProvider, vectorStore);
const retrieveService = new RetrieveService(embeddingProvider, vectorStore);
const answerService = new AnswerService(retrieveService);
function needsContextLog(summary?: string, itemsCount = 0, answer?: string) {
if (itemsCount === 0) {
return true;
}
const text = `${summary ?? ""} ${answer ?? ""}`;
return /no se recupero contexto|no hay informacion suficiente|no dispongo de mas detalles|contexto insuficiente/i.test(text);
}
app.use(express.json({ limit: "5mb" }));
app.use(express.static(publicDir));
@ -79,6 +90,16 @@ export function createApp() {
}
});
app.get("/logs/recent", async (req, res) => {
try {
const limit = req.query.limit ? Number(req.query.limit) : 20;
const logs = await evaluationLogs.listRecent(limit);
res.json(logs);
} catch (error) {
res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown logs error" });
}
});
app.post("/ingest", async (req, res) => {
try {
const result = await ingestService.ingest(req.body);
@ -142,15 +163,48 @@ export function createApp() {
}
: undefined;
const result = await retrieveService.retrieve(mode, intent, query, scope);
const items = result.items;
if (useModel) {
const modelSummary = await answerService.summarizeRetrieve(query, result, model);
res.json({
const payload = {
...result,
model: modelSummary.model,
modelSummary: modelSummary.summary
};
if (needsContextLog(payload.modelSummary, items.length)) {
await evaluationLogs.log({
trigger: "automatic",
operation: "retrieve",
reason: "retrieve_context_insufficient",
query,
mode,
intent,
scope,
model: payload.model,
responseSummary: payload.modelSummary,
retrievedItems: items
});
}
res.json(payload);
return;
}
if (needsContextLog(result.summary, items.length)) {
await evaluationLogs.log({
trigger: "automatic",
operation: "retrieve",
reason: "retrieve_context_insufficient",
query,
mode,
intent,
scope,
responseSummary: result.summary,
retrievedItems: items
});
}
res.json(result);
} catch (error) {
res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown retrieve error" });
@ -172,6 +226,34 @@ export function createApp() {
const model = req.body.model ? String(req.body.model) : undefined;
const preloadedContext = req.body.preloadedContext ? String(req.body.preloadedContext) : undefined;
const result = await answerService.answer(mode, intent, query, scope, model, preloadedContext);
if (needsContextLog(result.summary, result.citations.length, result.answer)) {
await evaluationLogs.log({
trigger: "automatic",
operation: "answer",
reason: "answer_context_insufficient",
query,
mode,
intent,
scope,
model: result.model,
note: preloadedContext ? "bootstrap_context_present" : undefined,
usedBootstrapContext: Boolean(preloadedContext),
responseSummary: result.answer,
retrievedItems: result.citations.map((citation) => ({
chunkId: citation.chunkId,
documentId: citation.documentId,
sourceId: scope?.sourceId ?? "",
title: citation.title,
sectionTitle: citation.sectionTitle,
content: "",
score: 0,
startLine: citation.startLine,
endLine: citation.endLine
}))
});
}
res.json(result);
} catch (error) {
res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown answer error" });
@ -224,11 +306,52 @@ export function createApp() {
allowAdditionalRetrieve
});
if (needsContextLog(result.retrieved?.summary, result.retrieved?.items.length ?? 0, result.answer)) {
await evaluationLogs.log({
trigger: "automatic",
operation: "chat",
reason: "chat_context_insufficient",
query: message,
mode,
intent: "specific",
scope,
model: result.model,
usedBootstrapContext: result.usedBootstrapContext,
usedAdditionalRetrieve: result.usedAdditionalRetrieve,
responseSummary: result.answer,
retrievedItems: result.retrieved?.items ?? []
});
}
res.json(result);
} catch (error) {
res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown chat error" });
}
});
app.post("/logs/manual", async (req, res) => {
try {
const query = String(req.body.query ?? "");
const entry = await evaluationLogs.log({
trigger: "manual",
operation: req.body.operation === "chat" || req.body.operation === "retrieve" ? req.body.operation : "answer",
reason: req.body.reason ? String(req.body.reason) : "manual_review_requested",
query,
mode: req.body.mode,
intent: req.body.intent,
scope: req.body.scope,
model: req.body.model,
note: req.body.note ? String(req.body.note) : undefined,
usedBootstrapContext: Boolean(req.body.usedBootstrapContext),
usedAdditionalRetrieve: Boolean(req.body.usedAdditionalRetrieve),
responseSummary: req.body.responseSummary ? String(req.body.responseSummary) : undefined,
retrievedItems: Array.isArray(req.body.retrievedItems) ? req.body.retrievedItems : []
});
res.status(201).json(entry);
} catch (error) {
res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown manual log error" });
}
});
return app;
}

View file

@ -17,6 +17,7 @@ export const env = {
qdrantUrl: requireEnv("QDRANT_URL", "http://localhost:6333"),
qdrantApiKey: process.env.QDRANT_API_KEY ?? "",
qdrantCollection: requireEnv("QDRANT_COLLECTION", "rag_chunks"),
qdrantLogsCollection: requireEnv("QDRANT_LOGS_COLLECTION", "rag_eval_logs"),
embeddingProvider: requireEnv("EMBEDDING_PROVIDER", "openrouter"),
embeddingModel: requireEnv("EMBEDDING_MODEL", "qwen/qwen3-embedding-8b"),
embeddingBaseUrl: requireEnv("EMBEDDING_BASE_URL", "https://openrouter.ai/api/v1"),

View file

@ -0,0 +1,101 @@
import { createHash } from "node:crypto";
import type { QdrantClient } from "@qdrant/js-client-rest";
import { env } from "../../config/env.js";
import type { EmbeddingProvider } from "../embeddings/provider.js";
import { buildQdrantClient } from "../vectorstore/client.js";
import type { EvaluationLogEntry, EvaluationLogInput } from "../../shared/types/rag.js";
function buildLogId(input: EvaluationLogInput): string {
const seed = `${Date.now()}::${input.operation}::${input.query}::${input.reason}`;
const hex = createHash("sha1").update(seed).digest("hex").slice(0, 32);
return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;
}
export class EvaluationLogService {
private readonly client: QdrantClient;
private collectionReady = false;
constructor(private readonly embeddingProvider: EmbeddingProvider) {
this.client = buildQdrantClient();
}
async log(input: EvaluationLogInput): Promise<EvaluationLogEntry> {
const [vector] = await this.embeddingProvider.embed([input.query || input.reason]);
await this.ensureCollection(vector.length);
const entry: EvaluationLogEntry = {
id: buildLogId(input),
trigger: input.trigger,
operation: input.operation,
reason: input.reason,
query: input.query,
mode: input.mode,
intent: input.intent,
model: input.model,
note: input.note,
createdAt: new Date().toISOString(),
scope: input.scope,
usedBootstrapContext: input.usedBootstrapContext,
usedAdditionalRetrieve: input.usedAdditionalRetrieve,
responseSummary: input.responseSummary,
retrievedItemsCount: input.retrievedItems?.length ?? 0,
chunkIds: (input.retrievedItems ?? []).map((item) => item.chunkId),
documentIds: [...new Set((input.retrievedItems ?? []).map((item) => item.documentId))]
};
await this.client.upsert(env.qdrantLogsCollection, {
wait: true,
points: [{
id: entry.id,
vector,
payload: entry as unknown as Record<string, unknown>
}]
});
return entry;
}
async listRecent(limit = 20): Promise<EvaluationLogEntry[]> {
const collections = await this.client.getCollections();
const exists = collections.collections.some((collection) => collection.name === env.qdrantLogsCollection);
if (!exists) {
return [];
}
const response = await this.client.scroll(env.qdrantLogsCollection, {
limit: Math.min(limit * 3, 100),
with_payload: true
});
return response.points
.map((point) => point.payload as unknown as EvaluationLogEntry)
.filter(Boolean)
.sort((left, right) => right.createdAt.localeCompare(left.createdAt))
.slice(0, Math.min(limit, 100));
}
private async ensureCollection(vectorSize: number): Promise<void> {
if (this.collectionReady) {
return;
}
const collections = await this.client.getCollections();
const exists = collections.collections.some((collection) => collection.name === env.qdrantLogsCollection);
if (!exists) {
await this.client.createCollection(env.qdrantLogsCollection, {
vectors: {
size: vectorSize,
distance: "Cosine"
}
});
await this.client.createPayloadIndex(env.qdrantLogsCollection, {
field_name: "createdAt",
field_schema: "keyword"
}).catch(() => undefined);
}
this.collectionReady = true;
}
}

View file

@ -2,7 +2,7 @@ import { QdrantClient } from "@qdrant/js-client-rest";
import { env } from "../../config/env.js";
import type { AvailableScope, IngestedChunk, RetrieveScope, RetrievedItem } from "../../shared/types/rag.js";
function buildQdrantClient(): QdrantClient {
export function buildQdrantClient(): QdrantClient {
const url = new URL(env.qdrantUrl);
return new QdrantClient({
host: url.hostname,

View file

@ -105,3 +105,39 @@ export interface ChatResponse {
usedAdditionalRetrieve: boolean;
retrieved?: RetrieveResponse;
}
export interface EvaluationLogInput {
trigger: "automatic" | "manual";
operation: "retrieve" | "answer" | "chat";
reason: string;
query: string;
mode?: ChunkMode;
intent?: RetrieveIntent;
scope?: RetrieveScope;
model?: string;
note?: string;
usedBootstrapContext?: boolean;
usedAdditionalRetrieve?: boolean;
responseSummary?: string;
retrievedItems?: RetrievedItem[];
}
export interface EvaluationLogEntry {
id: string;
trigger: "automatic" | "manual";
operation: "retrieve" | "answer" | "chat";
reason: string;
query: string;
mode?: ChunkMode;
intent?: RetrieveIntent;
model?: string;
note?: string;
createdAt: string;
scope?: RetrieveScope;
usedBootstrapContext?: boolean;
usedAdditionalRetrieve?: boolean;
responseSummary?: string;
retrievedItemsCount: number;
chunkIds: string[];
documentIds: string[];
}

View file

@ -66,6 +66,7 @@ Este archivo registra agentes y sesiones de trabajo de este workspace.
- Ajuste de la API y del playground para hacer visible y seleccionable el modelo de `answer`, evitando dejarlo oculto como una decision fija del backend.
- Evolucion del playground a una mecanica mas completa con pestañas `Ingesta / Bootstrap / Chat`, indicador visual de contexto activo y endpoint `/chat` con bootstrap reutilizable y consultas adicionales al RAG durante la conversacion.
- Ampliacion de la ingesta y del playground para soportar upload directo de archivos y `sourceId` personalizado, permitiendo aislar documentos ajenos al RAG en scopes separados.
- Implementacion de logs de evaluacion persistentes en `Qdrant`, con disparo automatico por contexto insuficiente y registro manual con nota desde el playground.
- Reorganizacion de RAG como modulo raiz independiente con documentacion propia en `RAG/docs/`.
- Ajuste del indice documental global para reflejar la separacion entre documentacion global y documentacion por tool.
- Creacion de `docs/TASK.md` para descomponer lineas de trabajo amplias en puntos de analisis y acuerdos.