import type { EmbeddingProvider } from "../embeddings/provider.js"; import type { VectorStoreClient } from "../vectorstore/client.js"; import type { ChunkMode, RetrieveIntent, RetrieveResponse, RetrieveScope } from "../../shared/types/rag.js"; function unique(values: string[]): string[] { return [...new Set(values.filter(Boolean))]; } function normalizeQuery(value: string): string { return value.trim().replace(/\s+/g, " "); } function scoreBoostFromContent(content: string): number { let boost = 0; if (/regla|importante|critico|critica/i.test(content)) { boost += 0.08; } if (/pendiente|backlog|task|objetivo/i.test(content)) { boost += 0.06; } if (/workspace|proyecto|documentacion/i.test(content)) { boost += 0.04; } if (/objetivo|objetivos principales|principios del sistema|resultado esperado|vision del sistema/i.test(content)) { boost += 0.16; } if (/\*\*proyecto:\*\*|\*\*modulo:\*\*|\*\*ultima actualizacion:\*\*/i.test(content)) { boost -= 0.18; } return boost; } function scoreBoostFromTitle(title: string): number { let boost = 0; if (/pendientes_generales/i.test(title)) { boost += 0.1; } if (/ids/i.test(title)) { boost += 0.12; } if (/service|client|chunking|provider|server|app/i.test(title)) { boost += 0.05; } if (/readme|indice_documentacion|historial_sesiones|task/i.test(title)) { boost += 0.03; } return boost; } function scoreBoostFromQueryAlignment(query: string, item: RetrieveResponse["items"][number]): number { const source = `${item.title} ${item.content}`.toLowerCase(); const normalizedQuery = query.toLowerCase(); let boost = 0; if (/pendiente|backlog|por hacer|siguiente/i.test(normalizedQuery) && /pendiente|backlog|lineas de trabajo|proximos pasos/i.test(source)) { boost += 0.18; } if (/regla|norma|protocolo|instruccion/i.test(normalizedQuery) && /regla|norma|protocolo|instruccion/i.test(source)) { boost += 0.18; } if (/sesion|historial|registro/i.test(normalizedQuery) && /sesion|historial|registro/i.test(source)) { boost += 0.16; } if (/indice|documentacion|mapa/i.test(normalizedQuery) && /indice|documentacion|mapa/i.test(source)) { boost += 0.14; } if (/funcion|metodo|clase|endpoint|service|client|chunk|codigo|linea|source_id|document_id|qdrant|embedding/i.test(normalizedQuery) && /function|class|endpoint|service|client|chunk|codigo|source_id|document_id|qdrant|embedding|def\s+/i.test(source)) { boost += 0.2; } if (/source_id|document_id|chunk_id|ids/i.test(normalizedQuery) && /buildsourceid|builddocumentid|buildchunkid|ids\.ts|source_id|document_id|chunk_id/i.test(source)) { boost += 0.28; } if (/source_id/i.test(normalizedQuery) && /buildsourceid|source_id/i.test(source)) { boost += 0.22; } if (/document_id/i.test(normalizedQuery) && /builddocumentid|document_id/i.test(source)) { boost += 0.22; } if (/chunk_id/i.test(normalizedQuery) && /buildchunkid|chunk_id/i.test(source)) { boost += 0.22; } return boost; } function rankItems(query: string, items: RetrieveResponse["items"]): RetrieveResponse["items"] { return [...items].sort((left, right) => { const leftScore = left.score + scoreBoostFromTitle(left.title) + scoreBoostFromContent(left.content) + scoreBoostFromQueryAlignment(query, left); const rightScore = right.score + scoreBoostFromTitle(right.title) + scoreBoostFromContent(right.content) + scoreBoostFromQueryAlignment(query, right); return rightScore - leftScore; }); } function buildBootstrapQueries(query: string): string[] { const cleaned = normalizeQuery(query); if (!cleaned) { return [ "dame un mapa inicial del dominio y sus ideas principales", "documentacion principal y estructura general", "temas base y puntos importantes a tener presentes", "referencias principales para profundizar despues" ]; } return unique([ cleaned, `${cleaned} mapa general`, `${cleaned} documentacion principal`, `${cleaned} pendientes y lineas de trabajo`, `${cleaned} reglas y estructura del workspace` ]); } function buildSpecificQueries(query: string): string[] { const cleaned = normalizeQuery(query); const queries = [cleaned]; if (/pendiente|backlog|por hacer|siguiente/i.test(cleaned)) { queries.push(`${cleaned} pendientes backlog proximos pasos`); queries.push(`${cleaned} lineas de trabajo prioritarias`); queries.push(`${cleaned} sistema basico RAG estructura MCP Retell`); } if (/regla|norma|protocolo|instruccion/i.test(cleaned)) { queries.push(`${cleaned} reglas protocolo instrucciones`); } if (/sesion|historial|registro/i.test(cleaned)) { queries.push(`${cleaned} historial sesiones registro`); } if (/indice|documentacion|mapa/i.test(cleaned)) { queries.push(`${cleaned} indice documentacion mapa`); } if (/caracteristica|como funciona|objetivo|modulo|arquitectura|stack/i.test(cleaned)) { queries.push(`${cleaned} sistema rag base arquitectura stack tecnico`); queries.push(`${cleaned} ingesta procesado salida embeddings qdrant`); queries.push(`${cleaned} objetivo principios casos de uso`); queries.push(`${cleaned} vision del sistema objetivos principales resultado esperado`); } if (/funcion|metodo|clase|endpoint|service|client|source_id|document_id|chunk|qdrant|embedding|codigo/i.test(cleaned)) { queries.push(`${cleaned} funcion metodo clase service client`); queries.push(`${cleaned} endpoint api qdrant embeddings retrieve answer ingest`); queries.push(`${cleaned} source_id document_id chunk_id codigo`); queries.push(`${cleaned} buildSourceId buildDocumentId buildChunkId ids.ts`); } return unique(queries); } function buildSpecificSummary(topics: string[], itemsCount: number): string { if (itemsCount === 0) { return "No se recupero contexto relevante para la consulta."; } return `Se recuperaron ${itemsCount} fragmentos relevantes sobre: ${topics.slice(0, 4).join(", ") || "tema consultado"}.`; } function buildBootstrapSummary(topics: string[], criticalPoints: string[], followUpRefs: string[], itemsCount: number): string { if (itemsCount === 0) { return "No se recupero contexto suficiente para construir el mapa inicial del dominio."; } const themeText = topics.slice(0, 4).join(", ") || "los documentos principales"; const criticalText = criticalPoints.slice(0, 3).join(", ") || "sin puntos criticos destacados"; return `Mapa inicial construido con ${itemsCount} fragmentos. Temas base: ${themeText}. Puntos a tener presentes: ${criticalText}. Referencias principales para profundizar: ${followUpRefs.slice(0, 4).join(", ") || "no disponibles"}.`; } async function embedQuery(provider: EmbeddingProvider, query: string): Promise { const [vector] = await provider.embed([query]); return vector; } export class RetrieveService { constructor( private readonly embeddingProvider: EmbeddingProvider, private readonly vectorStore: VectorStoreClient ) {} async retrieve(mode: ChunkMode, intent: RetrieveIntent, query: string, scope?: RetrieveScope): Promise { const items = intent === "bootstrap" ? await this.retrieveBootstrap(mode, query, scope) : await this.retrieveSpecific(mode, query, scope); const topics = unique(items.map((item) => item.title)).slice(0, 8); const criticalPoints = items .filter((item) => /critico|critical|importante|pendiente/i.test(item.content)) .map((item) => item.title) .filter(Boolean) .slice(0, 5); const followUpRefs = unique(items.map((item) => item.documentId)).slice(0, 8); const summary = intent === "bootstrap" ? buildBootstrapSummary(topics, criticalPoints, followUpRefs, items.length) : buildSpecificSummary(topics, items.length); return { mode, intent, summary, topics, criticalPoints, items, followUpRefs, scope }; } private async retrieveSpecific(mode: ChunkMode, query: string, scope?: RetrieveScope) { const queries = buildSpecificQueries(query); const merged = new Map(); for (const subquery of queries) { const queryVector = await embedQuery(this.embeddingProvider, subquery); const results = await this.vectorStore.search(queryVector, 6, mode, scope); for (const item of results) { const existing = merged.get(item.chunkId); if (!existing || item.score > existing.score) { merged.set(item.chunkId, item); } } } return rankItems(query, [...merged.values()]).slice(0, 8); } private async retrieveBootstrap(mode: ChunkMode, query: string, scope?: RetrieveScope) { const queries = buildBootstrapQueries(query); const merged = new Map(); for (const subquery of queries) { const queryVector = await embedQuery(this.embeddingProvider, subquery); const results = await this.vectorStore.search(queryVector, 6, mode, scope); for (const item of results) { const existing = merged.get(item.chunkId); if (!existing || item.score > existing.score) { merged.set(item.chunkId, item); } } } if (merged.size === 0 && scope && (scope.sourceId || scope.sourceRef || (scope.tags && scope.tags.length > 0))) { const explored = await this.vectorStore.browseScope(12, mode, scope); for (const item of explored) { merged.set(item.chunkId, item); } } return rankItems(query, [...merged.values()]).slice(0, 12); } }