Add local folder upload via zip to playground ingest

This commit is contained in:
Paco POR-CORREO 2026-04-06 22:51:43 +02:00
parent 93a5aee6cb
commit 8a2eceb877
6 changed files with 165 additions and 26 deletions

View file

@ -47,3 +47,4 @@ Dar continuidad al RAG en `RAG/` a partir del estado actual documentado.
- Limpieza ejecutada exitosamente sobre el `scope` del código fuente antiguo (`RAG/src`). - Limpieza ejecutada exitosamente sobre el `scope` del código fuente antiguo (`RAG/src`).
- Reingesta del directorio `RAG/src` con el código actualizado. - Reingesta del directorio `RAG/src` con el código actualizado.
- Documento de seguimiento `RAG/docs/TASK_LIMPIEZA.md` y documentacion API `RAG/docs/API_RAG.md` actualizados. - Documento de seguimiento `RAG/docs/TASK_LIMPIEZA.md` y documentacion API `RAG/docs/API_RAG.md` actualizados.
- Implementacion de ingesta directa de carpetas locales desde el playground: el navegador respeta `.gitignore`, empaqueta la carpeta en un `.zip` en memoria y el backend usa `adm-zip` para extraerla de forma segura en un directorio temporal antes de la ingesta.

21
RAG/package-lock.json generated
View file

@ -9,6 +9,7 @@
"version": "0.1.0", "version": "0.1.0",
"dependencies": { "dependencies": {
"@qdrant/js-client-rest": "^1.15.0", "@qdrant/js-client-rest": "^1.15.0",
"adm-zip": "^0.5.17",
"dotenv": "^16.4.5", "dotenv": "^16.4.5",
"express": "^4.21.2", "express": "^4.21.2",
"multer": "^2.0.0", "multer": "^2.0.0",
@ -16,6 +17,7 @@
"pdf-parse": "^1.1.1" "pdf-parse": "^1.1.1"
}, },
"devDependencies": { "devDependencies": {
"@types/adm-zip": "^0.5.8",
"@types/express": "^5.0.1", "@types/express": "^5.0.1",
"@types/multer": "^1.4.12", "@types/multer": "^1.4.12",
"@types/node": "^22.15.3", "@types/node": "^22.15.3",
@ -493,6 +495,16 @@
"pnpm": ">=8" "pnpm": ">=8"
} }
}, },
"node_modules/@types/adm-zip": {
"version": "0.5.8",
"resolved": "https://registry.npmjs.org/@types/adm-zip/-/adm-zip-0.5.8.tgz",
"integrity": "sha512-RVVH7QvZYbN+ihqZ4kX/dMiowf6o+Jk1fNwiSdx0NahBJLU787zkULhGhJM8mf/obmLGmgdMM0bXsQTmyfbR7Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/@types/body-parser": { "node_modules/@types/body-parser": {
"version": "1.19.6", "version": "1.19.6",
"resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.6.tgz", "resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.6.tgz",
@ -645,6 +657,15 @@
"node": ">= 0.6" "node": ">= 0.6"
} }
}, },
"node_modules/adm-zip": {
"version": "0.5.17",
"resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.17.tgz",
"integrity": "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ==",
"license": "MIT",
"engines": {
"node": ">=12.0"
}
},
"node_modules/agentkeepalive": { "node_modules/agentkeepalive": {
"version": "4.6.0", "version": "4.6.0",
"resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",

View file

@ -11,6 +11,7 @@
}, },
"dependencies": { "dependencies": {
"@qdrant/js-client-rest": "^1.15.0", "@qdrant/js-client-rest": "^1.15.0",
"adm-zip": "^0.5.17",
"dotenv": "^16.4.5", "dotenv": "^16.4.5",
"express": "^4.21.2", "express": "^4.21.2",
"multer": "^2.0.0", "multer": "^2.0.0",
@ -18,6 +19,7 @@
"pdf-parse": "^1.1.1" "pdf-parse": "^1.1.1"
}, },
"devDependencies": { "devDependencies": {
"@types/adm-zip": "^0.5.8",
"@types/express": "^5.0.1", "@types/express": "^5.0.1",
"@types/multer": "^1.4.12", "@types/multer": "^1.4.12",
"@types/node": "^22.15.3", "@types/node": "^22.15.3",

View file

@ -31,6 +31,10 @@ const ingestSourceIdWrapper = document.getElementById("ingestSourceIdWrapper");
const ingestSourceId = document.getElementById("ingestSourceId"); const ingestSourceId = document.getElementById("ingestSourceId");
const ingestSourceRef = document.getElementById("ingestSourceRef"); const ingestSourceRef = document.getElementById("ingestSourceRef");
const ingestUploadFile = document.getElementById("ingestUploadFile"); const ingestUploadFile = document.getElementById("ingestUploadFile");
const ingestUploadFolder = document.getElementById("ingestUploadFolder");
const btnUploadFile = document.getElementById("btnUploadFile");
const btnUploadFolder = document.getElementById("btnUploadFolder");
const uploadStatusText = document.getElementById("uploadStatusText");
const ingestMode = document.getElementById("ingestMode"); const ingestMode = document.getElementById("ingestMode");
const ingestTags = document.getElementById("ingestTags"); const ingestTags = document.getElementById("ingestTags");
const ingestModeHint = document.getElementById("ingestModeHint"); const ingestModeHint = document.getElementById("ingestModeHint");
@ -63,6 +67,8 @@ let chatHistory = [];
let availableScopes = []; let availableScopes = [];
let lastInteraction = null; let lastInteraction = null;
let currentUploadType = null; // 'file' o 'folder'
function format(value) { function format(value) {
return JSON.stringify(value, null, 2); return JSON.stringify(value, null, 2);
} }
@ -77,17 +83,29 @@ function buildScopeLabel(scope) {
} }
function updateIngestUiState() { function updateIngestUiState() {
const hasUpload = Boolean(ingestUploadFile.files && ingestUploadFile.files[0]); const hasFile = Boolean(ingestUploadFile.files && ingestUploadFile.files.length > 0);
const hasFolder = Boolean(ingestUploadFolder.files && ingestUploadFolder.files.length > 0);
const hasUpload = hasFile || hasFolder;
ingestSourceType.value = hasUpload ? "file" : ingestSourceType.value; ingestSourceType.value = hasUpload ? "file" : ingestSourceType.value;
ingestSourceType.disabled = hasUpload; ingestSourceType.disabled = hasUpload;
ingestSourceRef.disabled = hasUpload; ingestSourceRef.disabled = hasUpload;
ingestSourceIdWrapper.style.display = ingestScopeMode.value === "custom" ? "block" : "none"; ingestSourceIdWrapper.style.display = ingestScopeMode.value === "custom" ? "block" : "none";
if (hasUpload) { if (hasFile) {
ingestModeHint.textContent = `Upload directo activo: se ingerira el archivo local "${ingestUploadFile.files[0].name}" y se ignorara la ruta manual.`; uploadStatusText.textContent = `Archivo seleccionado: ${ingestUploadFile.files[0].name}`;
ingestModeHint.textContent = `Upload directo activo: se ingerira el archivo local "${ingestUploadFile.files[0].name}" y se ignorara la ruta remota.`;
ingestModeHint.classList.add("strong");
} else if (hasFolder) {
// Al seleccionar carpeta mostramos el nombre del primer archivo padre y cuantos ficheros hay
const firstPath = ingestUploadFolder.files[0].webkitRelativePath || "";
const folderName = firstPath.split('/')[0] || "Carpeta";
uploadStatusText.textContent = `Carpeta seleccionada: ${folderName} (${ingestUploadFolder.files.length} archivos totales, se filtraran ignorados)`;
ingestModeHint.textContent = `Upload directo activo: se comprimira y subira la carpeta local "${folderName}" y se ignorara la ruta remota.`;
ingestModeHint.classList.add("strong"); ingestModeHint.classList.add("strong");
} else { } else {
ingestModeHint.textContent = "Si seleccionas un archivo local, el playground usara upload directo y podras aislarlo con un `sourceId` propio para no mezclarlo con otros scopes."; uploadStatusText.textContent = "Ningun elemento seleccionado";
ingestModeHint.textContent = "Si seleccionas un archivo o carpeta local, el playground lo subira directamente y podras aislarlo con un `sourceId` propio.";
ingestModeHint.classList.remove("strong"); ingestModeHint.classList.remove("strong");
} }
} }
@ -285,7 +303,20 @@ cleanupScopeSelect.addEventListener("change", () => {
applySelectedCleanupScope(scope); applySelectedCleanupScope(scope);
}); });
btnUploadFile.addEventListener("click", () => {
ingestUploadFolder.value = "";
currentUploadType = 'file';
ingestUploadFile.click();
});
btnUploadFolder.addEventListener("click", () => {
ingestUploadFile.value = "";
currentUploadType = 'folder';
ingestUploadFolder.click();
});
ingestUploadFile.addEventListener("change", updateIngestUiState); ingestUploadFile.addEventListener("change", updateIngestUiState);
ingestUploadFolder.addEventListener("change", updateIngestUiState);
ingestScopeMode.addEventListener("change", updateIngestUiState); ingestScopeMode.addEventListener("change", updateIngestUiState);
scopeEditMode.addEventListener("change", updateScopeEditState); scopeEditMode.addEventListener("change", updateScopeEditState);
@ -304,7 +335,7 @@ ingestButton.addEventListener("click", async () => {
try { try {
let data; let data;
if (ingestUploadFile.files && ingestUploadFile.files[0]) { if (currentUploadType === 'file' && ingestUploadFile.files && ingestUploadFile.files[0]) {
const formData = new FormData(); const formData = new FormData();
formData.append("file", ingestUploadFile.files[0]); formData.append("file", ingestUploadFile.files[0]);
formData.append("mode", ingestMode.value); formData.append("mode", ingestMode.value);
@ -318,9 +349,60 @@ ingestButton.addEventListener("click", async () => {
body: formData body: formData
}); });
data = await response.json(); data = await response.json();
if (!response.ok) { if (!response.ok) throw new Error(data.error || `HTTP ${response.status}`);
throw new Error(data.error || `HTTP ${response.status}`);
} else if (currentUploadType === 'folder' && ingestUploadFolder.files && ingestUploadFolder.files.length > 0) {
ingestResult.textContent = "Empaquetando carpeta local (esto puede tardar unos segundos)...";
const zip = new JSZip();
const ig = ignore();
// Buscar .gitignore en la raiz
const gitignoreFile = Array.from(ingestUploadFolder.files).find(f => f.webkitRelativePath.match(/^[^\/]+\/\.gitignore$/));
if (gitignoreFile) {
const content = await gitignoreFile.text();
ig.add(content);
} }
// Reglas hardcodeadas de seguridad
ig.add(['node_modules/', '.git/', '.venv/', 'dist/', 'build/']);
let addedCount = 0;
for (const file of ingestUploadFolder.files) {
// webkitRelativePath format: "FolderName/path/to/file.ext"
// Le quitamos el primer segmento (FolderName) para validar con ignore correctamente
const relativePath = file.webkitRelativePath.split('/').slice(1).join('/');
if (relativePath && !ig.ignores(relativePath)) {
zip.file(relativePath, file);
addedCount++;
}
}
if (addedCount === 0) {
throw new Error("La carpeta esta vacia o todos sus archivos fueron ignorados (.gitignore, node_modules, etc).");
}
ingestResult.textContent = `Subiendo paquete comprimido con ${addedCount} archivos...`;
const zipBlob = await zip.generateAsync({ type: "blob", compression: "STORE" });
const folderName = ingestUploadFolder.files[0].webkitRelativePath.split('/')[0] || "upload";
const formData = new FormData();
formData.append("file", zipBlob, `${folderName}.zip`);
formData.append("isZipFolder", "true");
formData.append("mode", ingestMode.value);
formData.append("tags", splitTags(ingestTags.value).join(","));
if (ingestScopeMode.value === "custom" && ingestSourceId.value.trim()) {
formData.append("sourceId", ingestSourceId.value.trim());
}
const response = await fetch("/ingest/upload", {
method: "POST",
body: formData
});
data = await response.json();
if (!response.ok) throw new Error(data.error || `HTTP ${response.status}`);
} else { } else {
data = await request("/ingest", { data = await request("/ingest", {
sourceId: ingestScopeMode.value === "custom" ? (ingestSourceId.value.trim() || undefined) : undefined, sourceId: ingestScopeMode.value === "custom" ? (ingestSourceId.value.trim() || undefined) : undefined,

View file

@ -5,6 +5,8 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>RAG Playground</title> <title>RAG Playground</title>
<link rel="stylesheet" href="/playground/styles.css" /> <link rel="stylesheet" href="/playground/styles.css" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/ignore/5.3.1/ignore.min.js"></script>
</head> </head>
<body> <body>
<main class="layout"> <main class="layout">
@ -52,11 +54,17 @@
<option value="file">file</option> <option value="file">file</option>
</select> </select>
</label> </label>
<label>Ruta de la fuente <label>Ruta de la fuente (remota en servidor)
<input id="ingestSourceRef" value="/home/pancho/Documentos/Empresa/Desarrollo/IA/docs" /> <input id="ingestSourceRef" value="/home/pancho/Documentos/Empresa/Desarrollo/IA/docs" />
</label> </label>
<label>Archivo local a subir <label>Subida directa (sobreescribe la ruta de la fuente)
<input id="ingestUploadFile" type="file" accept=".pdf,.md,.txt,.ts,.tsx,.js,.jsx,.mjs,.cjs,.py,.json,.yml,.yaml" /> <div style="display: flex; gap: 8px; margin-top: 4px;">
<button id="btnUploadFile" class="secondary" style="flex: 1;">Archivo suelto</button>
<button id="btnUploadFolder" class="secondary" style="flex: 1;">Carpeta local</button>
</div>
<input id="ingestUploadFile" type="file" accept=".pdf,.md,.txt,.ts,.tsx,.js,.jsx,.mjs,.cjs,.py,.json,.yml,.yaml" style="display: none;" />
<input id="ingestUploadFolder" type="file" webkitdirectory directory multiple style="display: none;" />
<span id="uploadStatusText" style="display: block; margin-top: 8px; font-size: 13px; color: var(--accent);">Ningún elemento seleccionado</span>
</label> </label>
<label>Modo de ingesta <label>Modo de ingesta
<select id="ingestMode"> <select id="ingestMode">

View file

@ -1,10 +1,11 @@
import express from "express"; import express from "express";
import multer from "multer"; import multer from "multer";
import type { Request } from "express"; import type { Request } from "express";
import { writeFile, unlink } from "node:fs/promises"; import { writeFile, unlink, mkdtemp, rm } from "node:fs/promises";
import os from "node:os"; import os from "node:os";
import path from "node:path"; import path from "node:path";
import { fileURLToPath } from "node:url"; import { fileURLToPath } from "node:url";
import AdmZip from "adm-zip";
import { env } from "./config/env.js"; import { env } from "./config/env.js";
import { AnswerService } from "./modules/answer/service.js"; import { AnswerService } from "./modules/answer/service.js";
import { IngestService } from "./modules/ingest/service.js"; import { IngestService } from "./modules/ingest/service.js";
@ -150,7 +151,8 @@ export function createApp() {
}); });
app.post("/ingest/upload", upload.single("file"), async (req: UploadRequest, res) => { app.post("/ingest/upload", upload.single("file"), async (req: UploadRequest, res) => {
let tempPath: string | undefined; let tempFilePath: string | undefined;
let extractDirPath: string | undefined;
try { try {
if (!req.file) { if (!req.file) {
@ -158,32 +160,55 @@ export function createApp() {
return; return;
} }
const tempDir = await os.tmpdir(); const isZipFolder = req.body.isZipFolder === "true";
tempPath = path.join(tempDir, `${Date.now()}-${req.file.originalname}`); const tempDirBase = await os.tmpdir();
await writeFile(tempPath, req.file.buffer); tempFilePath = path.join(tempDirBase, `${Date.now()}-${req.file.originalname}`);
await writeFile(tempFilePath, req.file.buffer);
const tags = typeof req.body.tags === "string" const tags = typeof req.body.tags === "string"
? req.body.tags.split(",").map((entry: string) => entry.trim()).filter(Boolean) ? req.body.tags.split(",").map((entry: string) => entry.trim()).filter(Boolean)
: []; : [];
const result = await ingestService.ingest({ let result;
sourceId: req.body.sourceId ? String(req.body.sourceId) : undefined,
sourceType: "file", if (isZipFolder) {
sourceRef: req.file.originalname, // Logica para carpetas subidas como ZIP
readPath: tempPath, extractDirPath = await mkdtemp(path.join(tempDirBase, "rag-upload-"));
mode: req.body.mode === "interactive" ? "interactive" : "mechanical", const zip = new AdmZip(tempFilePath);
tags zip.extractAllTo(extractDirPath, true);
});
result = await ingestService.ingest({
sourceId: req.body.sourceId ? String(req.body.sourceId) : undefined,
sourceType: "folder",
sourceRef: req.file.originalname.replace(/\.zip$/i, ""), // Usamos el nombre original sin .zip como ref
readPath: extractDirPath,
mode: req.body.mode === "interactive" ? "interactive" : "mechanical",
tags
});
} else {
// Logica para archivos sueltos
result = await ingestService.ingest({
sourceId: req.body.sourceId ? String(req.body.sourceId) : undefined,
sourceType: "file",
sourceRef: req.file.originalname,
readPath: tempFilePath,
mode: req.body.mode === "interactive" ? "interactive" : "mechanical",
tags
});
}
res.status(202).json({ res.status(202).json({
...result, ...result,
uploadedFile: req.file.originalname uploadedResource: req.file.originalname
}); });
} catch (error) { } catch (error) {
res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown upload ingest error" }); res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown upload ingest error" });
} finally { } finally {
if (tempPath) { if (tempFilePath) {
await unlink(tempPath).catch(() => undefined); await unlink(tempFilePath).catch(() => undefined);
}
if (extractDirPath) {
await rm(extractDirPath, { recursive: true, force: true }).catch(() => undefined);
} }
} }
}); });