adding movies & web search tool
All checks were successful
Recommender Build and Deploy (internal) / Build Recommender Image (push) Successful in 4m0s
Recommender Build and Deploy (internal) / Deploy Recommender (internal) (push) Successful in 12s

This commit is contained in:
2026-03-26 20:35:22 -03:00
parent 6fdfc3797a
commit 1437092a42
25 changed files with 450 additions and 135 deletions

View File

@@ -1,5 +1,5 @@
import { openai } from '../agent.js';
import type { InterpreterOutput, RankingOutput, CuratorOutput } from '../types/agents.js';
import type { InterpreterOutput, RankingOutput, CuratorOutput, MediaType } from '../types/agents.js';
import { z } from 'zod';
import { zodTextFormat } from 'openai/helpers/zod';
@@ -14,7 +14,11 @@ const CuratorSchema = z.object({
export async function runCurator(
ranking: RankingOutput,
interpreter: InterpreterOutput,
mediaType: MediaType = 'tv_show',
useWebSearch = false,
): Promise<CuratorOutput[]> {
const mediaLabel = mediaType === 'movie' ? 'movie' : 'TV show';
const allShows = [
...ranking.definitely_like.map((t) => ({ title: t, category: 'Definitely Like' as const })),
...ranking.might_like.map((t) => ({ title: t, category: 'Might Like' as const })),
@@ -32,17 +36,15 @@ export async function runCurator(
model: 'gpt-5.4',
temperature: 0.5,
service_tier: 'flex',
tools: [
{ type: 'web_search' }
],
...(useWebSearch ? { tools: [{ type: 'web_search' as const }] } : {}),
text: { format: zodTextFormat(CuratorSchema, "shows") },
instructions: `You are a TV show recommendation curator. For each show, write a concise 1-2 sentence explanation of why it was assigned to its category based on the user's preferences.
instructions: `You are a ${mediaLabel} recommendation curator. For each ${mediaLabel}, write a concise 1-2 sentence explanation of why it was assigned to its category based on the user's preferences.${useWebSearch ? '\n\nUse web search to verify details and enrich explanations with accurate information.' : ''}
Rules:
- Preserve the exact title and category as given
- Keep explanations concise (1-2 sentences max)
- Reference specific user preferences in the explanation
- Be honest — explain why "Questionable" or "Will Not Like" shows got that rating`,
- Be honest — explain why "Questionable" or "Will Not Like" ${mediaLabel}s got that rating`,
input: `User preferences summary:
Liked: ${JSON.stringify(interpreter.liked)}
Themes: ${JSON.stringify(interpreter.themes)}
@@ -50,7 +52,7 @@ Tone: ${JSON.stringify(interpreter.tone)}
Character preferences: ${JSON.stringify(interpreter.character_preferences)}
Avoid: ${JSON.stringify(interpreter.avoid)}
Shows to describe:
${mediaLabel}s to describe:
${showList}`,
});

View File

@@ -1,5 +1,5 @@
import { openai } from '../agent.js';
import type { InterpreterOutput } from '../types/agents.js';
import type { InterpreterOutput, MediaType } from '../types/agents.js';
import { z } from 'zod';
import { zodTextFormat } from 'openai/helpers/zod';
@@ -17,10 +17,12 @@ interface InterpreterInput {
liked_shows: string;
disliked_shows: string;
themes: string;
media_type: MediaType;
feedback_context?: string;
}
export async function runInterpreter(input: InterpreterInput): Promise<InterpreterOutput> {
const mediaLabel = input.media_type === 'movie' ? 'movie' : 'TV show';
const feedbackSection = input.feedback_context
? `\n\nUser Feedback Context (incorporate into preferences):\n${input.feedback_context}`
: '';
@@ -30,7 +32,7 @@ export async function runInterpreter(input: InterpreterInput): Promise<Interpret
temperature: 0.2,
service_tier: 'flex',
text: { format: zodTextFormat(InterpreterSchema, "preferences") },
instructions: `You are a TV show preference interpreter. Transform raw user input into structured, normalized preferences.
instructions: `You are a ${mediaLabel} preference interpreter. Transform raw user input into structured, normalized preferences.
Rules:
- Extract implicit preferences from the main prompt
@@ -39,8 +41,8 @@ Rules:
- Do NOT assume anything not stated or clearly implied
- Be specific and concrete, not vague`,
input: `Main prompt: ${input.main_prompt}
Liked shows: ${input.liked_shows || '(none)'}
Disliked shows: ${input.disliked_shows || '(none)'}
Liked ${mediaLabel}s: ${input.liked_shows || '(none)'}
Disliked ${mediaLabel}s: ${input.disliked_shows || '(none)'}
Themes and requirements: ${input.themes || '(none)'}${feedbackSection}`,
});

View File

@@ -1,5 +1,5 @@
import { openai } from '../agent.js';
import type { InterpreterOutput, RetrievalOutput, RankingOutput } from '../types/agents.js';
import type { InterpreterOutput, RetrievalOutput, RankingOutput, MediaType } from '../types/agents.js';
import { z } from 'zod';
import { zodTextFormat } from 'openai/helpers/zod';
@@ -13,7 +13,10 @@ const RankingSchema = z.object({
export async function runRanking(
interpreter: InterpreterOutput,
retrieval: RetrievalOutput,
mediaType: MediaType = 'tv_show',
): Promise<RankingOutput> {
const mediaLabel = mediaType === 'movie' ? 'movie' : 'TV show';
// Phase 1: Pre-filter — remove avoidance violations
const avoidList = interpreter.avoid.map((a) => a.toLowerCase());
const filtered = retrieval.candidates.filter((c) => {
@@ -43,7 +46,7 @@ export async function runRanking(
temperature: 0.2,
service_tier: 'flex',
text: { format: zodTextFormat(RankingSchema, "ranking") },
instructions: `You are a TV show ranking critic. Assign each show to exactly one of four confidence buckets based on how well it matches the user's preferences.
instructions: `You are a ${mediaLabel} ranking critic. Assign each ${mediaLabel} to exactly one of four confidence buckets based on how well it matches the user's preferences.
Buckets:
- "definitely_like": Near-perfect match to all preferences
@@ -51,15 +54,15 @@ Buckets:
- "questionable": Partial alignment, some aspects don't match
- "will_not_like": Likely mismatch, conflicts with preferences or avoidance criteria
Every show in the input must appear in exactly one bucket. Use the title exactly as given.`,
Every ${mediaLabel} in the input must appear in exactly one bucket. Use the title exactly as given.`,
input: `User preferences:
Liked shows: ${JSON.stringify(interpreter.liked)}
Liked ${mediaLabel}s: ${JSON.stringify(interpreter.liked)}
Themes: ${JSON.stringify(interpreter.themes)}
Character preferences: ${JSON.stringify(interpreter.character_preferences)}
Tone: ${JSON.stringify(interpreter.tone)}
Avoid: ${JSON.stringify(interpreter.avoid)}
Rank these shows:
Rank these ${mediaLabel}s:
${chunkTitles}`,
});

View File

@@ -1,5 +1,5 @@
import { openai } from '../agent.js';
import type { InterpreterOutput, RetrievalOutput } from '../types/agents.js';
import type { InterpreterOutput, RetrievalOutput, MediaType } from '../types/agents.js';
import { z } from 'zod';
import { zodTextFormat } from 'openai/helpers/zod';
@@ -10,33 +10,39 @@ const RetrievalSchema = z.object({
}))
});
export async function runRetrieval(input: InterpreterOutput, brainstormCount = 100): Promise<RetrievalOutput> {
export async function runRetrieval(
input: InterpreterOutput,
brainstormCount = 100,
mediaType: MediaType = 'tv_show',
useWebSearch = false,
): Promise<RetrievalOutput> {
const mediaLabel = mediaType === 'movie' ? 'movie' : 'TV show';
const mediaLabelPlural = mediaType === 'movie' ? 'movies' : 'TV shows';
const response = await openai.responses.parse({
model: 'gpt-5.4',
temperature: 0.9,
service_tier: 'flex',
tools: [
{ type: 'web_search' }
],
...(useWebSearch ? { tools: [{ type: 'web_search' as const }] } : {}),
text: { format: zodTextFormat(RetrievalSchema, "candidates") },
instructions: `You are a TV show candidate generator. Your goal is to brainstorm a LARGE, DIVERSE pool of ${brainstormCount} TV show candidates that match the user's structured preferences.
instructions: `You are a ${mediaLabel} candidate generator. Your goal is to brainstorm a LARGE, DIVERSE pool of ${brainstormCount} ${mediaLabel} candidates that match the user's structured preferences.${useWebSearch ? '\n\nUse web search to find recent and accurate titles, including newer releases.' : ''}
Rules:
- Include both well-known and obscure shows
- Include both well-known and obscure ${mediaLabelPlural}
- Prioritize RECALL over precision — it's better to include too many than too few
- Each "reason" should briefly explain why the show matches the preferences
- Each "reason" should briefly explain why the ${mediaLabel} matches the preferences
- Avoid duplicates
- Include shows from different decades, countries, and networks
- Include ${mediaLabelPlural} from different decades, countries${mediaType === 'tv_show' ? ', and networks' : ', and directors'}
- Aim for ${brainstormCount} candidates minimum`,
input: `Structured preferences:
Liked shows: ${JSON.stringify(input.liked)}
Disliked shows: ${JSON.stringify(input.disliked)}
Liked ${mediaLabelPlural}: ${JSON.stringify(input.liked)}
Disliked ${mediaLabelPlural}: ${JSON.stringify(input.disliked)}
Themes: ${JSON.stringify(input.themes)}
Character preferences: ${JSON.stringify(input.character_preferences)}
Tone: ${JSON.stringify(input.tone)}
Avoid: ${JSON.stringify(input.avoid)}
Generate a large, diverse pool of TV show candidates.`,
Generate a large, diverse pool of ${mediaLabel} candidates.`,
});
return (response.output_parsed as RetrievalOutput) ?? { candidates: [] };

View File

@@ -1,12 +1,14 @@
import { openai } from '../agent.js';
import type { InterpreterOutput } from '../types/agents.js';
import type { InterpreterOutput, MediaType } from '../types/agents.js';
export async function generateTitle(interpreter: InterpreterOutput, mediaType: MediaType = 'tv_show'): Promise<string> {
const mediaLabel = mediaType === 'movie' ? 'movie' : 'TV show';
export async function generateTitle(interpreter: InterpreterOutput): Promise<string> {
const response = await openai.responses.create({
model: 'gpt-5.4-mini',
temperature: 0.7,
service_tier: 'flex',
instructions: `Generate a concise 5-8 word title for a TV show recommendation session.
instructions: `Generate a concise 5-8 word title for a ${mediaLabel} recommendation session.
Capture the essence of the user's taste — genre, tone, key themes.
Respond with ONLY the title. No quotes, no trailing punctuation.
Examples: "Dark Crime Dramas With Moral Ambiguity", "Cozy British Mysteries With Quirky Detectives"`,

View File

@@ -1,4 +1,4 @@
import { pgTable, uuid, text, jsonb, timestamp, integer, uniqueIndex } from 'drizzle-orm/pg-core';
import { pgTable, uuid, text, jsonb, timestamp, integer, uniqueIndex, boolean } from 'drizzle-orm/pg-core';
import type { CuratorOutput } from '../types/agents.js';
export const recommendations = pgTable('recommendations', {
@@ -9,6 +9,8 @@ export const recommendations = pgTable('recommendations', {
disliked_shows: text('disliked_shows').notNull().default(''),
themes: text('themes').notNull().default(''),
brainstorm_count: integer('brainstorm_count').notNull().default(100),
media_type: text('media_type').notNull().default('tv_show'),
use_web_search: boolean('use_web_search').notNull().default(false),
recommendations: jsonb('recommendations').$type<CuratorOutput[]>(),
status: text('status').notNull().default('pending'),
created_at: timestamp('created_at').defaultNow().notNull(),
@@ -18,10 +20,10 @@ export const feedback = pgTable(
'feedback',
{
id: uuid('id').defaultRandom().primaryKey(),
tv_show_name: text('tv_show_name').notNull(),
item_name: text('item_name').notNull(),
stars: integer('stars').notNull(),
feedback: text('feedback').notNull().default(''),
created_at: timestamp('created_at').defaultNow().notNull(),
},
(table) => [uniqueIndex('feedback_tv_show_name_idx').on(table.tv_show_name)],
(table) => [uniqueIndex('feedback_item_name_idx').on(table.item_name)],
);

View File

@@ -0,0 +1,32 @@
import { drizzle } from 'drizzle-orm/postgres-js';
import { migrate } from 'drizzle-orm/postgres-js/migrator';
import postgres from 'postgres';
import * as dotenv from 'dotenv';
dotenv.config({ path: ['.env.local', '.env'] });
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.error('DATABASE_URL is not set');
process.exit(1);
}
// Using max: 1 connection since it's only for migration
const migrationClient = postgres(connectionString, { max: 1 });
const db = drizzle(migrationClient);
const runMigrations = async () => {
console.log('Running database migrations...');
try {
await migrate(db, { migrationsFolder: './drizzle' });
console.log('Migrations completed successfully.');
} catch (err) {
console.error('Error running migrations:', err);
process.exit(1);
} finally {
await migrationClient.end();
}
};
runMigrations();

View File

@@ -5,14 +5,14 @@ import { runInterpreter } from '../agents/interpreter.js';
import { runRetrieval } from '../agents/retrieval.js';
import { runRanking } from '../agents/ranking.js';
import { runCurator } from '../agents/curator.js';
import type { CuratorOutput, SSEEvent } from '../types/agents.js';
import type { CuratorOutput, MediaType, SSEEvent } from '../types/agents.js';
import { generateTitle } from '../agents/titleGenerator.js';
/* -- Agent pipeline --
[1] Interpreter -> gets user input, transforms into structured data
[2] Retrieval -> gets shows from OpenAI (high temperature)
[3] Ranking -> ranks shows based on user input
[4] Curator -> curates shows based on user input
[2] Retrieval -> gets candidates from OpenAI (high temperature)
[3] Ranking -> ranks candidates based on user input
[4] Curator -> curates candidates based on user input
*/
type RecommendationRecord = typeof recommendations.$inferSelect;
@@ -33,8 +33,10 @@ export async function runPipeline(
): Promise<CuratorOutput[]> {
let currentStage: SSEEvent['stage'] = 'interpreter';
const startTime = Date.now();
const mediaType = (rec.media_type ?? 'tv_show') as MediaType;
const useWebSearch = rec.use_web_search ?? false;
log(rec.id, `Starting pipeline for "${rec.title}"${feedbackContext ? ' (with feedback context)' : ''}`);
log(rec.id, `Starting pipeline for "${rec.title}" [${mediaType}${useWebSearch ? ', web_search' : ''}]${feedbackContext ? ' (with feedback context)' : ''}`);
try {
// Set status to running
@@ -54,6 +56,7 @@ export async function runPipeline(
liked_shows: rec.liked_shows,
disliked_shows: rec.disliked_shows,
themes: rec.themes,
media_type: mediaType,
...(feedbackContext !== undefined ? { feedback_context: feedbackContext } : {}),
});
log(rec.id, `Interpreter: done (${Date.now() - t0}ms)`, {
@@ -70,7 +73,7 @@ export async function runPipeline(
log(rec.id, 'Retrieval: start');
sseWrite({ stage: 'retrieval', status: 'start' });
const t1 = Date.now();
const retrievalOutput = await runRetrieval(interpreterOutput, rec.brainstorm_count);
const retrievalOutput = await runRetrieval(interpreterOutput, rec.brainstorm_count, mediaType, useWebSearch);
log(rec.id, `Retrieval: done (${Date.now() - t1}ms) — ${retrievalOutput.candidates.length} candidates`, {
titles: retrievalOutput.candidates.map((c) => c.title),
});
@@ -81,7 +84,7 @@ export async function runPipeline(
log(rec.id, 'Ranking: start');
sseWrite({ stage: 'ranking', status: 'start' });
const t2 = Date.now();
const rankingOutput = await runRanking(interpreterOutput, retrievalOutput);
const rankingOutput = await runRanking(interpreterOutput, retrievalOutput, mediaType);
log(rec.id, `Ranking: done (${Date.now() - t2}ms)`, {
definitely_like: rankingOutput.definitely_like.length,
might_like: rankingOutput.might_like.length,
@@ -95,15 +98,15 @@ export async function runPipeline(
log(rec.id, 'Curator: start');
sseWrite({ stage: 'curator', status: 'start' });
const t3 = Date.now();
const curatorOutput = await runCurator(rankingOutput, interpreterOutput);
log(rec.id, `Curator: done (${Date.now() - t3}ms) — ${curatorOutput.length} shows curated`);
const curatorOutput = await runCurator(rankingOutput, interpreterOutput, mediaType, useWebSearch);
log(rec.id, `Curator: done (${Date.now() - t3}ms) — ${curatorOutput.length} items curated`);
sseWrite({ stage: 'curator', status: 'done', data: curatorOutput });
// Generate AI title
let aiTitle: string = rec.title;
try {
log(rec.id, 'Title generation: start');
aiTitle = await generateTitle(interpreterOutput);
aiTitle = await generateTitle(interpreterOutput, mediaType);
log(rec.id, `Title generation: done — "${aiTitle}"`);
} catch (err) {
log(rec.id, `Title generation failed, keeping initial title: ${String(err)}`);

View File

@@ -1,13 +1,12 @@
import type { FastifyInstance } from 'fastify';
import { eq } from 'drizzle-orm';
import { db } from '../db.js';
import { feedback } from '../db/schema.js';
export default async function feedbackRoute(fastify: FastifyInstance) {
// POST /feedback — upsert by tv_show_name
// POST /feedback — upsert by item_name
fastify.post('/feedback', async (request, reply) => {
const body = request.body as {
tv_show_name: string;
item_name: string;
stars: number;
feedback?: string;
};
@@ -15,12 +14,12 @@ export default async function feedbackRoute(fastify: FastifyInstance) {
await db
.insert(feedback)
.values({
tv_show_name: body.tv_show_name,
item_name: body.item_name,
stars: body.stars,
feedback: body.feedback ?? '',
})
.onConflictDoUpdate({
target: feedback.tv_show_name,
target: feedback.item_name,
set: {
stars: body.stars,
feedback: body.feedback ?? '',

View File

@@ -3,7 +3,7 @@ import { eq, desc } from 'drizzle-orm';
import { db } from '../db.js';
import { recommendations, feedback } from '../db/schema.js';
import { runPipeline } from '../pipelines/recommendation.js';
import type { SSEEvent } from '../types/agents.js';
import type { MediaType, SSEEvent } from '../types/agents.js';
export default async function recommendationsRoute(fastify: FastifyInstance) {
// POST /recommendations — create record, return { id }
@@ -14,6 +14,8 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
disliked_shows?: string;
themes?: string;
brainstorm_count?: number;
media_type?: string;
use_web_search?: boolean;
};
const title = (body.main_prompt ?? '')
@@ -24,6 +26,8 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
const rawCount = Number(body.brainstorm_count ?? 100);
const brainstorm_count = Number.isFinite(rawCount) ? Math.min(200, Math.max(50, rawCount)) : 100;
const media_type: MediaType = body.media_type === 'movie' ? 'movie' : 'tv_show';
const use_web_search = body.use_web_search === true;
const [rec] = await db
.insert(recommendations)
@@ -34,6 +38,8 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
disliked_shows: body.disliked_shows ?? '',
themes: body.themes ?? '',
brainstorm_count,
media_type,
use_web_search,
status: 'pending',
})
.returning({ id: recommendations.id });
@@ -48,6 +54,7 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
id: recommendations.id,
title: recommendations.title,
status: recommendations.status,
media_type: recommendations.media_type,
created_at: recommendations.created_at,
})
.from(recommendations)
@@ -68,7 +75,6 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
});
// GET /recommendations/:id/stream — SSE pipeline stream
// Always fetches all current feedback and injects if present (supports rerank flow)
fastify.get('/recommendations/:id/stream', async (request, reply) => {
const { id } = request.params as { id: string };
const [rec] = await db
@@ -80,12 +86,13 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
// Load all feedback to potentially inject as context
const feedbackRows = await db.select().from(feedback);
const mediaLabel = rec.media_type === 'movie' ? 'Movie' : 'Show';
const feedbackContext =
feedbackRows.length > 0
? feedbackRows
.map(
(f) =>
`Show: "${f.tv_show_name}" — Rating: ${f.stars}/3 stars${f.feedback ? ` — Comment: ${f.feedback}` : ''}`,
`${mediaLabel}: "${f.item_name}" — Rating: ${f.stars}/3 stars${f.feedback ? ` — Comment: ${f.feedback}` : ''}`,
)
.join('\n')
: undefined;

View File

@@ -1,3 +1,5 @@
export type MediaType = 'tv_show' | 'movie';
export interface InterpreterOutput {
liked: string[];
disliked: string[];