From b7846d89edf091c230de6cdb6331d168e0e549ac Mon Sep 17 00:00:00 2001 From: Paco POR-CORREO Date: Mon, 6 Apr 2026 15:51:11 +0200 Subject: [PATCH] Add upload and bootstrap chat workflow to playground --- RAG/docs/API_RAG.md | 39 +++++++++++ RAG/docs/PLAYGROUND.md | 11 +++ RAG/package-lock.json | 112 ++++++++++++++++++++++++++++++ RAG/package.json | 2 + RAG/public/playground/app.js | 37 ++++++++-- RAG/public/playground/index.html | 7 ++ RAG/public/playground/styles.css | 6 ++ RAG/src/app.ts | 48 +++++++++++++ RAG/src/modules/ingest/service.ts | 2 +- RAG/src/shared/types/rag.ts | 1 + docs/HISTORIAL_SESIONES.md | 1 + 11 files changed, 259 insertions(+), 7 deletions(-) diff --git a/RAG/docs/API_RAG.md b/RAG/docs/API_RAG.md index d572975..2560d05 100644 --- a/RAG/docs/API_RAG.md +++ b/RAG/docs/API_RAG.md @@ -81,8 +81,10 @@ Payload base: Campos: +- `sourceId`: opcional, permite aislar la fuente en un scope propio - `sourceType`: `file` o `folder` - `sourceRef`: ruta de la fuente +- `readPath`: uso interno cuando se sube un archivo y el backend lo procesa desde una ruta temporal - `mode`: `mechanical` o `interactive` - `tags`: etiquetas opcionales para clasificar la fuente @@ -124,6 +126,20 @@ Respuesta esperada aproximada: } ``` +Ejemplo de aislamiento en un scope propio: + +```bash +curl -sS -X POST "https://rag.por-correo.com/ingest" \ + -H "Content-Type: application/json" \ + -d '{ + "sourceId": "src:default:manual:pdf-tecnico-cliente-a", + "sourceType": "file", + "sourceRef": "/ruta/a/documento.pdf", + "mode": "mechanical", + "tags": ["cliente-a", "pdf-tecnico"] + }' +``` + --- ### 3. `POST /retrieve` @@ -164,6 +180,9 @@ Campos: - `sourceRef` - `tags` +Nota util: +- si una ingesta se hizo con `sourceId` propio, despues puedes consultar solo ese material usando `scope.sourceId` + #### Ejemplo `retrieve` documental ```bash @@ -368,6 +387,26 @@ Payload base: } ``` +--- + +### 7. `POST /ingest/upload` + +Sirve para subir un archivo directamente al backend desde una interfaz web o cliente compatible con `multipart/form-data`. + +Es util para: +- PDFs o documentos sueltos +- pruebas rapidas desde el playground +- crear un scope aislado para una carga concreta + +Campos esperados del formulario: + +- `file` +- `sourceId` opcional +- `mode` +- `tags` + +Si se usa `sourceId`, el archivo subido no se mezcla con otros scopes salvo que elijas reutilizar ese mismo identificador. + Respuesta esperada resumida: ```json diff --git a/RAG/docs/PLAYGROUND.md b/RAG/docs/PLAYGROUND.md index 51790aa..b7443bb 100644 --- a/RAG/docs/PLAYGROUND.md +++ b/RAG/docs/PLAYGROUND.md @@ -70,6 +70,8 @@ El playground ya no funciona como una sola caja de consulta tecnica. Ahora se or 1. `Ingesta` - lanzar ingesta documental o de codigo +- subir archivos directamente desde el navegador +- definir un `sourceId` propio para aislar una carga concreta 2. `Bootstrap` - elegir scope @@ -93,6 +95,15 @@ En la pestaña `Chat` hay un indicador visual: Tambien se muestra el `scope` actualmente cargado. +### Aislamiento de scopes en ingesta + +En la pestaña `Ingesta` ya se puede: + +- indicar un `sourceId` propio +- subir un archivo local directamente + +Esto permite probar documentos o PDFs que no tengan nada que ver con el RAG sin mezclarlos con el resto de scopes si eliges un identificador especifico para esa carga. + ### Chat con consultas adicionales al RAG El chat ya soporta dos niveles: diff --git a/RAG/package-lock.json b/RAG/package-lock.json index ad697ea..3393d17 100644 --- a/RAG/package-lock.json +++ b/RAG/package-lock.json @@ -11,11 +11,13 @@ "@qdrant/js-client-rest": "^1.15.0", "dotenv": "^16.4.5", "express": "^4.21.2", + "multer": "^2.0.0", "openai": "^4.104.0", "pdf-parse": "^1.1.1" }, "devDependencies": { "@types/express": "^5.0.1", + "@types/multer": "^1.4.12", "@types/node": "^22.15.3", "@types/pdf-parse": "^1.1.4", "tsx": "^4.19.3", @@ -544,6 +546,16 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/multer": { + "version": "1.4.13", + "resolved": "https://registry.npmjs.org/@types/multer/-/multer-1.4.13.tgz", + "integrity": "sha512-bhhdtPw7JqCiEfC9Jimx5LqX9BDIPJEh2q/fQ4bqbBPtyEZYr3cvF22NwG0DmPZNYA0CAf2CnqDB4KIGGpJcaw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/express": "*" + } + }, "node_modules/@types/node": { "version": "22.19.17", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.17.tgz", @@ -645,6 +657,12 @@ "node": ">= 8.0.0" } }, + "node_modules/append-field": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz", + "integrity": "sha512-klpgFSWLW1ZEs8svjfb7g4qWY0YS5imI82dTg+QahUvJ8YqAY0P10Uk8tTyh9ZGuYEZEMaeJYCF5BFuX552hsw==", + "license": "MIT" + }, "node_modules/array-flatten": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", @@ -681,6 +699,23 @@ "npm": "1.2.8000 || >= 1.4.16" } }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "license": "MIT" + }, + "node_modules/busboy": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz", + "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==", + "dependencies": { + "streamsearch": "^1.1.0" + }, + "engines": { + "node": ">=10.16.0" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -731,6 +766,21 @@ "node": ">= 0.8" } }, + "node_modules/concat-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-2.0.0.tgz", + "integrity": "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==", + "engines": [ + "node >= 6.0" + ], + "license": "MIT", + "dependencies": { + "buffer-from": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.0.2", + "typedarray": "^0.0.6" + } + }, "node_modules/content-disposition": { "version": "0.5.4", "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", @@ -1329,6 +1379,25 @@ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", "license": "MIT" }, + "node_modules/multer": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/multer/-/multer-2.1.1.tgz", + "integrity": "sha512-mo+QTzKlx8R7E5ylSXxWzGoXoZbOsRMpyitcht8By2KHvMbf3tjwosZ/Mu/XYU6UuJ3VZnODIrak5ZrPiPyB6A==", + "license": "MIT", + "dependencies": { + "append-field": "^1.0.0", + "busboy": "^1.6.0", + "concat-stream": "^2.0.0", + "type-is": "^1.6.18" + }, + "engines": { + "node": ">= 10.16.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/negotiator": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", @@ -1536,6 +1605,20 @@ "node": ">= 0.8" } }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/resolve-pkg-maps": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", @@ -1704,6 +1787,23 @@ "node": ">= 0.8" } }, + "node_modules/streamsearch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", + "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/toidentifier": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", @@ -1752,6 +1852,12 @@ "node": ">= 0.6" } }, + "node_modules/typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA==", + "license": "MIT" + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", @@ -1789,6 +1895,12 @@ "node": ">= 0.8" } }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, "node_modules/utils-merge": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", diff --git a/RAG/package.json b/RAG/package.json index 2967b05..e278ba3 100644 --- a/RAG/package.json +++ b/RAG/package.json @@ -13,11 +13,13 @@ "@qdrant/js-client-rest": "^1.15.0", "dotenv": "^16.4.5", "express": "^4.21.2", + "multer": "^2.0.0", "openai": "^4.104.0", "pdf-parse": "^1.1.1" }, "devDependencies": { "@types/express": "^5.0.1", + "@types/multer": "^1.4.12", "@types/node": "^22.15.3", "@types/pdf-parse": "^1.1.4", "tsx": "^4.19.3", diff --git a/RAG/public/playground/app.js b/RAG/public/playground/app.js index 21207de..951781e 100644 --- a/RAG/public/playground/app.js +++ b/RAG/public/playground/app.js @@ -21,7 +21,9 @@ const contextStatusText = document.getElementById("contextStatusText"); const contextScopeText = document.getElementById("contextScopeText"); const ingestSourceType = document.getElementById("ingestSourceType"); +const ingestSourceId = document.getElementById("ingestSourceId"); const ingestSourceRef = document.getElementById("ingestSourceRef"); +const ingestUploadFile = document.getElementById("ingestUploadFile"); const ingestMode = document.getElementById("ingestMode"); const ingestTags = document.getElementById("ingestTags"); @@ -199,12 +201,35 @@ healthButton.addEventListener("click", async () => { ingestButton.addEventListener("click", async () => { ingestResult.textContent = "Ejecutando ingesta..."; try { - const data = await request("/ingest", { - sourceType: ingestSourceType.value, - sourceRef: ingestSourceRef.value, - mode: ingestMode.value, - tags: splitTags(ingestTags.value) - }); + let data; + + if (ingestUploadFile.files && ingestUploadFile.files[0]) { + const formData = new FormData(); + formData.append("file", ingestUploadFile.files[0]); + formData.append("mode", ingestMode.value); + formData.append("tags", splitTags(ingestTags.value).join(",")); + if (ingestSourceId.value.trim()) { + formData.append("sourceId", ingestSourceId.value.trim()); + } + + const response = await fetch("/ingest/upload", { + method: "POST", + body: formData + }); + data = await response.json(); + if (!response.ok) { + throw new Error(data.error || `HTTP ${response.status}`); + } + } else { + data = await request("/ingest", { + sourceId: ingestSourceId.value.trim() || undefined, + sourceType: ingestSourceType.value, + sourceRef: ingestSourceRef.value, + mode: ingestMode.value, + tags: splitTags(ingestTags.value) + }); + } + ingestResult.textContent = format(data); await loadScopes(); } catch (error) { diff --git a/RAG/public/playground/index.html b/RAG/public/playground/index.html index e20c5fd..7e193e4 100644 --- a/RAG/public/playground/index.html +++ b/RAG/public/playground/index.html @@ -27,6 +27,9 @@

Ingesta

+ +
+

Si seleccionas un archivo local, el playground usara upload directo y podras aislarlo con un `sourceId` propio para no mezclarlo con otros scopes.

diff --git a/RAG/public/playground/styles.css b/RAG/public/playground/styles.css index c8fb3d1..976f107 100644 --- a/RAG/public/playground/styles.css +++ b/RAG/public/playground/styles.css @@ -79,6 +79,12 @@ body { h1, h2, h3 { margin: 0 0 12px; } .lead { margin: 0; color: var(--muted); max-width: 760px; } +.helper { + color: var(--muted); + font-size: 13px; + margin: 0 0 16px; +} + .grid { display: grid; gap: 24px; diff --git a/RAG/src/app.ts b/RAG/src/app.ts index a99d46c..d0fada5 100644 --- a/RAG/src/app.ts +++ b/RAG/src/app.ts @@ -1,4 +1,8 @@ import express from "express"; +import multer from "multer"; +import type { Request } from "express"; +import { writeFile, unlink } from "node:fs/promises"; +import os from "node:os"; import path from "node:path"; import { fileURLToPath } from "node:url"; import { env } from "./config/env.js"; @@ -11,11 +15,16 @@ import { supportedParserExtensions } from "./modules/parsers/parser-registry.js" import { QdrantVectorStoreClient } from "./modules/vectorstore/client.js"; import type { ChatMessage, ChunkMode, RetrieveIntent, RetrieveScope } from "./shared/types/rag.js"; +type UploadRequest = Request & { + file?: Express.Multer.File; +}; + export function createApp() { const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const publicDir = path.resolve(__dirname, "../public"); const app = express(); + const upload = multer({ storage: multer.memoryStorage() }); const embeddingProvider = new OpenRouterEmbeddingProvider(); const vectorStore = new QdrantVectorStoreClient(); const ingestService = new IngestService(embeddingProvider, vectorStore); @@ -79,6 +88,45 @@ export function createApp() { } }); + app.post("/ingest/upload", upload.single("file"), async (req: UploadRequest, res) => { + let tempPath: string | undefined; + + try { + if (!req.file) { + res.status(400).json({ ok: false, error: "Missing file upload" }); + return; + } + + const tempDir = await os.tmpdir(); + tempPath = path.join(tempDir, `${Date.now()}-${req.file.originalname}`); + await writeFile(tempPath, req.file.buffer); + + const tags = typeof req.body.tags === "string" + ? req.body.tags.split(",").map((entry: string) => entry.trim()).filter(Boolean) + : []; + + const result = await ingestService.ingest({ + sourceId: req.body.sourceId ? String(req.body.sourceId) : undefined, + sourceType: "file", + sourceRef: req.file.originalname, + readPath: tempPath, + mode: req.body.mode === "interactive" ? "interactive" : "mechanical", + tags + }); + + res.status(202).json({ + ...result, + uploadedFile: req.file.originalname + }); + } catch (error) { + res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown upload ingest error" }); + } finally { + if (tempPath) { + await unlink(tempPath).catch(() => undefined); + } + } + }); + app.post("/retrieve", async (req, res) => { try { const mode = (req.body.mode ?? "auto") as ChunkMode; diff --git a/RAG/src/modules/ingest/service.ts b/RAG/src/modules/ingest/service.ts index a6461fd..c0732f9 100644 --- a/RAG/src/modules/ingest/service.ts +++ b/RAG/src/modules/ingest/service.ts @@ -84,7 +84,7 @@ export class IngestService { private async resolveInputFiles(source: IngestSourceInput): Promise { if (source.sourceType === "file") { - return [source.sourceRef]; + return [source.readPath ?? source.sourceRef]; } return listFilesRecursively(source.sourceRef); } diff --git a/RAG/src/shared/types/rag.ts b/RAG/src/shared/types/rag.ts index a85c285..dea862c 100644 --- a/RAG/src/shared/types/rag.ts +++ b/RAG/src/shared/types/rag.ts @@ -10,6 +10,7 @@ export interface IngestSourceInput { sourceId?: string; sourceType: SourceType; sourceRef: string; + readPath?: string; mode?: IngestMode; tags?: string[]; } diff --git a/docs/HISTORIAL_SESIONES.md b/docs/HISTORIAL_SESIONES.md index f3b394c..94edd18 100644 --- a/docs/HISTORIAL_SESIONES.md +++ b/docs/HISTORIAL_SESIONES.md @@ -65,6 +65,7 @@ Este archivo registra agentes y sesiones de trabajo de este workspace. - Creacion de `RAG/docs/PLAYGROUND.md` para documentar la tecnologia elegida, su ubicacion y su papel dentro del modulo. - Ajuste de la API y del playground para hacer visible y seleccionable el modelo de `answer`, evitando dejarlo oculto como una decision fija del backend. - Evolucion del playground a una mecanica mas completa con pestañas `Ingesta / Bootstrap / Chat`, indicador visual de contexto activo y endpoint `/chat` con bootstrap reutilizable y consultas adicionales al RAG durante la conversacion. +- Ampliacion de la ingesta y del playground para soportar upload directo de archivos y `sourceId` personalizado, permitiendo aislar documentos ajenos al RAG en scopes separados. - Reorganizacion de RAG como modulo raiz independiente con documentacion propia en `RAG/docs/`. - Ajuste del indice documental global para reflejar la separacion entre documentacion global y documentacion por tool. - Creacion de `docs/TASK.md` para descomponer lineas de trabajo amplias en puntos de analisis y acuerdos.