Add local folder upload via zip to playground ingest

This commit is contained in:
Paco POR-CORREO 2026-04-06 22:51:43 +02:00
parent 93a5aee6cb
commit 8a2eceb877
6 changed files with 165 additions and 26 deletions

View file

@ -47,3 +47,4 @@ Dar continuidad al RAG en `RAG/` a partir del estado actual documentado.
- Limpieza ejecutada exitosamente sobre el `scope` del código fuente antiguo (`RAG/src`).
- Reingesta del directorio `RAG/src` con el código actualizado.
- Documento de seguimiento `RAG/docs/TASK_LIMPIEZA.md` y documentacion API `RAG/docs/API_RAG.md` actualizados.
- Implementacion de ingesta directa de carpetas locales desde el playground: el navegador respeta `.gitignore`, empaqueta la carpeta en un `.zip` en memoria y el backend usa `adm-zip` para extraerla de forma segura en un directorio temporal antes de la ingesta.

21
RAG/package-lock.json generated
View file

@ -9,6 +9,7 @@
"version": "0.1.0",
"dependencies": {
"@qdrant/js-client-rest": "^1.15.0",
"adm-zip": "^0.5.17",
"dotenv": "^16.4.5",
"express": "^4.21.2",
"multer": "^2.0.0",
@ -16,6 +17,7 @@
"pdf-parse": "^1.1.1"
},
"devDependencies": {
"@types/adm-zip": "^0.5.8",
"@types/express": "^5.0.1",
"@types/multer": "^1.4.12",
"@types/node": "^22.15.3",
@ -493,6 +495,16 @@
"pnpm": ">=8"
}
},
"node_modules/@types/adm-zip": {
"version": "0.5.8",
"resolved": "https://registry.npmjs.org/@types/adm-zip/-/adm-zip-0.5.8.tgz",
"integrity": "sha512-RVVH7QvZYbN+ihqZ4kX/dMiowf6o+Jk1fNwiSdx0NahBJLU787zkULhGhJM8mf/obmLGmgdMM0bXsQTmyfbR7Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/@types/body-parser": {
"version": "1.19.6",
"resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.6.tgz",
@ -645,6 +657,15 @@
"node": ">= 0.6"
}
},
"node_modules/adm-zip": {
"version": "0.5.17",
"resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.17.tgz",
"integrity": "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ==",
"license": "MIT",
"engines": {
"node": ">=12.0"
}
},
"node_modules/agentkeepalive": {
"version": "4.6.0",
"resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",

View file

@ -11,6 +11,7 @@
},
"dependencies": {
"@qdrant/js-client-rest": "^1.15.0",
"adm-zip": "^0.5.17",
"dotenv": "^16.4.5",
"express": "^4.21.2",
"multer": "^2.0.0",
@ -18,6 +19,7 @@
"pdf-parse": "^1.1.1"
},
"devDependencies": {
"@types/adm-zip": "^0.5.8",
"@types/express": "^5.0.1",
"@types/multer": "^1.4.12",
"@types/node": "^22.15.3",

View file

@ -31,6 +31,10 @@ const ingestSourceIdWrapper = document.getElementById("ingestSourceIdWrapper");
const ingestSourceId = document.getElementById("ingestSourceId");
const ingestSourceRef = document.getElementById("ingestSourceRef");
const ingestUploadFile = document.getElementById("ingestUploadFile");
const ingestUploadFolder = document.getElementById("ingestUploadFolder");
const btnUploadFile = document.getElementById("btnUploadFile");
const btnUploadFolder = document.getElementById("btnUploadFolder");
const uploadStatusText = document.getElementById("uploadStatusText");
const ingestMode = document.getElementById("ingestMode");
const ingestTags = document.getElementById("ingestTags");
const ingestModeHint = document.getElementById("ingestModeHint");
@ -63,6 +67,8 @@ let chatHistory = [];
let availableScopes = [];
let lastInteraction = null;
let currentUploadType = null; // 'file' o 'folder'
function format(value) {
return JSON.stringify(value, null, 2);
}
@ -77,17 +83,29 @@ function buildScopeLabel(scope) {
}
function updateIngestUiState() {
const hasUpload = Boolean(ingestUploadFile.files && ingestUploadFile.files[0]);
const hasFile = Boolean(ingestUploadFile.files && ingestUploadFile.files.length > 0);
const hasFolder = Boolean(ingestUploadFolder.files && ingestUploadFolder.files.length > 0);
const hasUpload = hasFile || hasFolder;
ingestSourceType.value = hasUpload ? "file" : ingestSourceType.value;
ingestSourceType.disabled = hasUpload;
ingestSourceRef.disabled = hasUpload;
ingestSourceIdWrapper.style.display = ingestScopeMode.value === "custom" ? "block" : "none";
if (hasUpload) {
ingestModeHint.textContent = `Upload directo activo: se ingerira el archivo local "${ingestUploadFile.files[0].name}" y se ignorara la ruta manual.`;
if (hasFile) {
uploadStatusText.textContent = `Archivo seleccionado: ${ingestUploadFile.files[0].name}`;
ingestModeHint.textContent = `Upload directo activo: se ingerira el archivo local "${ingestUploadFile.files[0].name}" y se ignorara la ruta remota.`;
ingestModeHint.classList.add("strong");
} else if (hasFolder) {
// Al seleccionar carpeta mostramos el nombre del primer archivo padre y cuantos ficheros hay
const firstPath = ingestUploadFolder.files[0].webkitRelativePath || "";
const folderName = firstPath.split('/')[0] || "Carpeta";
uploadStatusText.textContent = `Carpeta seleccionada: ${folderName} (${ingestUploadFolder.files.length} archivos totales, se filtraran ignorados)`;
ingestModeHint.textContent = `Upload directo activo: se comprimira y subira la carpeta local "${folderName}" y se ignorara la ruta remota.`;
ingestModeHint.classList.add("strong");
} else {
ingestModeHint.textContent = "Si seleccionas un archivo local, el playground usara upload directo y podras aislarlo con un `sourceId` propio para no mezclarlo con otros scopes.";
uploadStatusText.textContent = "Ningun elemento seleccionado";
ingestModeHint.textContent = "Si seleccionas un archivo o carpeta local, el playground lo subira directamente y podras aislarlo con un `sourceId` propio.";
ingestModeHint.classList.remove("strong");
}
}
@ -285,7 +303,20 @@ cleanupScopeSelect.addEventListener("change", () => {
applySelectedCleanupScope(scope);
});
btnUploadFile.addEventListener("click", () => {
ingestUploadFolder.value = "";
currentUploadType = 'file';
ingestUploadFile.click();
});
btnUploadFolder.addEventListener("click", () => {
ingestUploadFile.value = "";
currentUploadType = 'folder';
ingestUploadFolder.click();
});
ingestUploadFile.addEventListener("change", updateIngestUiState);
ingestUploadFolder.addEventListener("change", updateIngestUiState);
ingestScopeMode.addEventListener("change", updateIngestUiState);
scopeEditMode.addEventListener("change", updateScopeEditState);
@ -304,7 +335,7 @@ ingestButton.addEventListener("click", async () => {
try {
let data;
if (ingestUploadFile.files && ingestUploadFile.files[0]) {
if (currentUploadType === 'file' && ingestUploadFile.files && ingestUploadFile.files[0]) {
const formData = new FormData();
formData.append("file", ingestUploadFile.files[0]);
formData.append("mode", ingestMode.value);
@ -318,9 +349,60 @@ ingestButton.addEventListener("click", async () => {
body: formData
});
data = await response.json();
if (!response.ok) {
throw new Error(data.error || `HTTP ${response.status}`);
if (!response.ok) throw new Error(data.error || `HTTP ${response.status}`);
} else if (currentUploadType === 'folder' && ingestUploadFolder.files && ingestUploadFolder.files.length > 0) {
ingestResult.textContent = "Empaquetando carpeta local (esto puede tardar unos segundos)...";
const zip = new JSZip();
const ig = ignore();
// Buscar .gitignore en la raiz
const gitignoreFile = Array.from(ingestUploadFolder.files).find(f => f.webkitRelativePath.match(/^[^\/]+\/\.gitignore$/));
if (gitignoreFile) {
const content = await gitignoreFile.text();
ig.add(content);
}
// Reglas hardcodeadas de seguridad
ig.add(['node_modules/', '.git/', '.venv/', 'dist/', 'build/']);
let addedCount = 0;
for (const file of ingestUploadFolder.files) {
// webkitRelativePath format: "FolderName/path/to/file.ext"
// Le quitamos el primer segmento (FolderName) para validar con ignore correctamente
const relativePath = file.webkitRelativePath.split('/').slice(1).join('/');
if (relativePath && !ig.ignores(relativePath)) {
zip.file(relativePath, file);
addedCount++;
}
}
if (addedCount === 0) {
throw new Error("La carpeta esta vacia o todos sus archivos fueron ignorados (.gitignore, node_modules, etc).");
}
ingestResult.textContent = `Subiendo paquete comprimido con ${addedCount} archivos...`;
const zipBlob = await zip.generateAsync({ type: "blob", compression: "STORE" });
const folderName = ingestUploadFolder.files[0].webkitRelativePath.split('/')[0] || "upload";
const formData = new FormData();
formData.append("file", zipBlob, `${folderName}.zip`);
formData.append("isZipFolder", "true");
formData.append("mode", ingestMode.value);
formData.append("tags", splitTags(ingestTags.value).join(","));
if (ingestScopeMode.value === "custom" && ingestSourceId.value.trim()) {
formData.append("sourceId", ingestSourceId.value.trim());
}
const response = await fetch("/ingest/upload", {
method: "POST",
body: formData
});
data = await response.json();
if (!response.ok) throw new Error(data.error || `HTTP ${response.status}`);
} else {
data = await request("/ingest", {
sourceId: ingestScopeMode.value === "custom" ? (ingestSourceId.value.trim() || undefined) : undefined,

View file

@ -5,6 +5,8 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>RAG Playground</title>
<link rel="stylesheet" href="/playground/styles.css" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/ignore/5.3.1/ignore.min.js"></script>
</head>
<body>
<main class="layout">
@ -52,11 +54,17 @@
<option value="file">file</option>
</select>
</label>
<label>Ruta de la fuente
<label>Ruta de la fuente (remota en servidor)
<input id="ingestSourceRef" value="/home/pancho/Documentos/Empresa/Desarrollo/IA/docs" />
</label>
<label>Archivo local a subir
<input id="ingestUploadFile" type="file" accept=".pdf,.md,.txt,.ts,.tsx,.js,.jsx,.mjs,.cjs,.py,.json,.yml,.yaml" />
<label>Subida directa (sobreescribe la ruta de la fuente)
<div style="display: flex; gap: 8px; margin-top: 4px;">
<button id="btnUploadFile" class="secondary" style="flex: 1;">Archivo suelto</button>
<button id="btnUploadFolder" class="secondary" style="flex: 1;">Carpeta local</button>
</div>
<input id="ingestUploadFile" type="file" accept=".pdf,.md,.txt,.ts,.tsx,.js,.jsx,.mjs,.cjs,.py,.json,.yml,.yaml" style="display: none;" />
<input id="ingestUploadFolder" type="file" webkitdirectory directory multiple style="display: none;" />
<span id="uploadStatusText" style="display: block; margin-top: 8px; font-size: 13px; color: var(--accent);">Ningún elemento seleccionado</span>
</label>
<label>Modo de ingesta
<select id="ingestMode">

View file

@ -1,10 +1,11 @@
import express from "express";
import multer from "multer";
import type { Request } from "express";
import { writeFile, unlink } from "node:fs/promises";
import { writeFile, unlink, mkdtemp, rm } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import AdmZip from "adm-zip";
import { env } from "./config/env.js";
import { AnswerService } from "./modules/answer/service.js";
import { IngestService } from "./modules/ingest/service.js";
@ -150,7 +151,8 @@ export function createApp() {
});
app.post("/ingest/upload", upload.single("file"), async (req: UploadRequest, res) => {
let tempPath: string | undefined;
let tempFilePath: string | undefined;
let extractDirPath: string | undefined;
try {
if (!req.file) {
@ -158,32 +160,55 @@ export function createApp() {
return;
}
const tempDir = await os.tmpdir();
tempPath = path.join(tempDir, `${Date.now()}-${req.file.originalname}`);
await writeFile(tempPath, req.file.buffer);
const isZipFolder = req.body.isZipFolder === "true";
const tempDirBase = await os.tmpdir();
tempFilePath = path.join(tempDirBase, `${Date.now()}-${req.file.originalname}`);
await writeFile(tempFilePath, req.file.buffer);
const tags = typeof req.body.tags === "string"
? req.body.tags.split(",").map((entry: string) => entry.trim()).filter(Boolean)
: [];
const result = await ingestService.ingest({
sourceId: req.body.sourceId ? String(req.body.sourceId) : undefined,
sourceType: "file",
sourceRef: req.file.originalname,
readPath: tempPath,
mode: req.body.mode === "interactive" ? "interactive" : "mechanical",
tags
});
let result;
if (isZipFolder) {
// Logica para carpetas subidas como ZIP
extractDirPath = await mkdtemp(path.join(tempDirBase, "rag-upload-"));
const zip = new AdmZip(tempFilePath);
zip.extractAllTo(extractDirPath, true);
result = await ingestService.ingest({
sourceId: req.body.sourceId ? String(req.body.sourceId) : undefined,
sourceType: "folder",
sourceRef: req.file.originalname.replace(/\.zip$/i, ""), // Usamos el nombre original sin .zip como ref
readPath: extractDirPath,
mode: req.body.mode === "interactive" ? "interactive" : "mechanical",
tags
});
} else {
// Logica para archivos sueltos
result = await ingestService.ingest({
sourceId: req.body.sourceId ? String(req.body.sourceId) : undefined,
sourceType: "file",
sourceRef: req.file.originalname,
readPath: tempFilePath,
mode: req.body.mode === "interactive" ? "interactive" : "mechanical",
tags
});
}
res.status(202).json({
...result,
uploadedFile: req.file.originalname
uploadedResource: req.file.originalname
});
} catch (error) {
res.status(500).json({ ok: false, error: error instanceof Error ? error.message : "Unknown upload ingest error" });
} finally {
if (tempPath) {
await unlink(tempPath).catch(() => undefined);
if (tempFilePath) {
await unlink(tempFilePath).catch(() => undefined);
}
if (extractDirPath) {
await rm(extractDirPath, { recursive: true, force: true }).catch(() => undefined);
}
}
});