adding buckets!
All checks were successful
Recommender Build and Deploy (internal) / Build Recommender Image (push) Successful in 5m42s
Recommender Build and Deploy (internal) / Deploy Recommender (internal) (push) Successful in 39s

This commit is contained in:
2026-03-31 16:42:16 -03:00
parent 77757ace5e
commit be2d8d70cb
2 changed files with 84 additions and 20 deletions

View File

@@ -5,7 +5,7 @@ import { runInterpreter } from '../agents/interpreter.js';
import { runRetrieval } from '../agents/retrieval.js';
import { runRanking } from '../agents/ranking.js';
import { runCurator } from '../agents/curator.js';
import type { CuratorOutput, MediaType, SSEEvent } from '../types/agents.js';
import type { CuratorOutput, MediaType, RankingOutput, RetrievalCandidate, SSEEvent } from '../types/agents.js';
import { generateTitle } from '../agents/titleGenerator.js';
/* -- Agent pipeline --
@@ -17,6 +17,29 @@ import { generateTitle } from '../agents/titleGenerator.js';
type RecommendationRecord = typeof recommendations.$inferSelect;
function getBucketCount(count: number): number {
if (count <= 50) return 1;
if (count <= 100) return 2;
if (count <= 150) return 3;
return 4;
}
function deduplicateCandidates(candidates: RetrievalCandidate[]): RetrievalCandidate[] {
const seen = new Set<string>();
return candidates.filter((c) => {
const key = c.title.toLowerCase();
if (seen.has(key)) return false;
seen.add(key);
return true;
});
}
function splitIntoBuckets<T>(items: T[], n: number): T[][] {
const size = Math.ceil(items.length / n);
return Array.from({ length: n }, (_, i) => items.slice(i * size, (i + 1) * size))
.filter((b) => b.length > 0);
}
function log(recId: string, msg: string, data?: unknown) {
const ts = new Date().toISOString();
if (data !== undefined) {
@@ -68,24 +91,45 @@ export async function runPipeline(
});
sseWrite({ stage: 'interpreter', status: 'done', data: interpreterOutput });
// --- Retrieval ---
// --- Retrieval (bucketed) ---
currentStage = 'retrieval';
log(rec.id, 'Retrieval: start');
sseWrite({ stage: 'retrieval', status: 'start' });
const t1 = Date.now();
const retrievalOutput = await runRetrieval(interpreterOutput, rec.brainstorm_count, mediaType, useWebSearch);
log(rec.id, `Retrieval: done (${Date.now() - t1}ms) — ${retrievalOutput.candidates.length} candidates`, {
titles: retrievalOutput.candidates.map((c) => c.title),
const retrievalBucketCount = getBucketCount(rec.brainstorm_count);
const perBucketCount = Math.ceil(rec.brainstorm_count / retrievalBucketCount);
const retrievalBuckets = await Promise.all(
Array.from({ length: retrievalBucketCount }, () =>
runRetrieval(interpreterOutput, perBucketCount, mediaType, useWebSearch)
)
);
const allCandidates = retrievalBuckets.flatMap((r) => r.candidates);
const dedupedCandidates = deduplicateCandidates(allCandidates);
const retrievalOutput = { candidates: dedupedCandidates };
log(rec.id, `Retrieval: done (${Date.now() - t1}ms) — ${dedupedCandidates.length} candidates (${retrievalBucketCount} buckets, ${allCandidates.length} before dedup)`, {
titles: dedupedCandidates.map((c) => c.title),
});
sseWrite({ stage: 'retrieval', status: 'done', data: retrievalOutput });
// --- Ranking ---
// --- Ranking (bucketed) ---
currentStage = 'ranking';
log(rec.id, 'Ranking: start');
sseWrite({ stage: 'ranking', status: 'start' });
const t2 = Date.now();
const rankingOutput = await runRanking(interpreterOutput, retrievalOutput, mediaType);
log(rec.id, `Ranking: done (${Date.now() - t2}ms)`, {
const rankBucketCount = getBucketCount(dedupedCandidates.length);
const candidateBuckets = splitIntoBuckets(dedupedCandidates, rankBucketCount);
const rankingBuckets = await Promise.all(
candidateBuckets.map((bucket) =>
runRanking(interpreterOutput, { candidates: bucket }, mediaType)
)
);
const rankingOutput: RankingOutput = {
definitely_like: rankingBuckets.flatMap((r) => r.definitely_like),
might_like: rankingBuckets.flatMap((r) => r.might_like),
questionable: rankingBuckets.flatMap((r) => r.questionable),
will_not_like: rankingBuckets.flatMap((r) => r.will_not_like),
};
log(rec.id, `Ranking: done (${Date.now() - t2}ms) — ${rankBucketCount} buckets`, {
definitely_like: rankingOutput.definitely_like.length,
might_like: rankingOutput.might_like.length,
questionable: rankingOutput.questionable.length,
@@ -93,13 +137,33 @@ export async function runPipeline(
});
sseWrite({ stage: 'ranking', status: 'done', data: rankingOutput });
// --- Curator ---
// --- Curator (bucketed) ---
currentStage = 'curator';
log(rec.id, 'Curator: start');
sseWrite({ stage: 'curator', status: 'start' });
const t3 = Date.now();
const curatorOutput = await runCurator(rankingOutput, interpreterOutput, mediaType, useWebSearch);
log(rec.id, `Curator: done (${Date.now() - t3}ms) — ${curatorOutput.length} items curated`);
type CategorizedItem = { title: string; category: keyof RankingOutput };
const categorizedItems: CategorizedItem[] = [
...rankingOutput.definitely_like.map((t) => ({ title: t, category: 'definitely_like' as const })),
...rankingOutput.might_like.map((t) => ({ title: t, category: 'might_like' as const })),
...rankingOutput.questionable.map((t) => ({ title: t, category: 'questionable' as const })),
...rankingOutput.will_not_like.map((t) => ({ title: t, category: 'will_not_like' as const })),
];
const curatorBucketCount = getBucketCount(categorizedItems.length);
const curatorItemBuckets = splitIntoBuckets(categorizedItems, curatorBucketCount);
const curatorBucketRankings: RankingOutput[] = curatorItemBuckets.map((bucket) => ({
definitely_like: bucket.filter((i) => i.category === 'definitely_like').map((i) => i.title),
might_like: bucket.filter((i) => i.category === 'might_like').map((i) => i.title),
questionable: bucket.filter((i) => i.category === 'questionable').map((i) => i.title),
will_not_like: bucket.filter((i) => i.category === 'will_not_like').map((i) => i.title),
}));
const curatorBucketOutputs = await Promise.all(
curatorBucketRankings.map((ranking) =>
runCurator(ranking, interpreterOutput, mediaType, useWebSearch)
)
);
const curatorOutput = curatorBucketOutputs.flat();
log(rec.id, `Curator: done (${Date.now() - t3}ms) — ${curatorOutput.length} items curated (${curatorBucketCount} buckets)`);
sseWrite({ stage: 'curator', status: 'done', data: curatorOutput });
// Generate AI title