adding movies & web search tool

2026-03-26 20:35:22 -03:00
parent 6fdfc3797a
commit 1437092a42
25 changed files with 450 additions and 135 deletions
--- a/packages/backend/src/agents/curator.ts
+++ b/packages/backend/src/agents/curator.ts
@@ -1,5 +1,5 @@
 import { openai } from '../agent.js';
-import type { InterpreterOutput, RankingOutput, CuratorOutput } from '../types/agents.js';
+import type { InterpreterOutput, RankingOutput, CuratorOutput, MediaType } from '../types/agents.js';
 import { z } from 'zod';
 import { zodTextFormat } from 'openai/helpers/zod';

@@ -14,7 +14,11 @@ const CuratorSchema = z.object({
 export async function runCurator(
  ranking: RankingOutput,
  interpreter: InterpreterOutput,
+  mediaType: MediaType = 'tv_show',
+  useWebSearch = false,
 ): Promise<CuratorOutput[]> {
+  const mediaLabel = mediaType === 'movie' ? 'movie' : 'TV show';
+
  const allShows = [
    ...ranking.definitely_like.map((t) => ({ title: t, category: 'Definitely Like' as const })),
    ...ranking.might_like.map((t) => ({ title: t, category: 'Might Like' as const })),
@@ -32,17 +36,15 @@ export async function runCurator(
    model: 'gpt-5.4',
    temperature: 0.5,
    service_tier: 'flex',
-    tools: [
-      { type: 'web_search' }
-    ],
+    ...(useWebSearch ? { tools: [{ type: 'web_search' as const }] } : {}),
    text: { format: zodTextFormat(CuratorSchema, "shows") },
-    instructions: `You are a TV show recommendation curator. For each show, write a concise 1-2 sentence explanation of why it was assigned to its category based on the user's preferences.
+    instructions: `You are a ${mediaLabel} recommendation curator. For each ${mediaLabel}, write a concise 1-2 sentence explanation of why it was assigned to its category based on the user's preferences.${useWebSearch ? '\n\nUse web search to verify details and enrich explanations with accurate information.' : ''}

 Rules:
 - Preserve the exact title and category as given
 - Keep explanations concise (1-2 sentences max)
 - Reference specific user preferences in the explanation
- Be honest — explain why "Questionable" or "Will Not Like" shows got that rating`,
+- Be honest — explain why "Questionable" or "Will Not Like" ${mediaLabel}s got that rating`,
    input: `User preferences summary:
 Liked: ${JSON.stringify(interpreter.liked)}
 Themes: ${JSON.stringify(interpreter.themes)}
@@ -50,7 +52,7 @@ Tone: ${JSON.stringify(interpreter.tone)}
 Character preferences: ${JSON.stringify(interpreter.character_preferences)}
 Avoid: ${JSON.stringify(interpreter.avoid)}

-Shows to describe:
+${mediaLabel}s to describe:
 ${showList}`,
  });

--- a/packages/backend/src/agents/interpreter.ts
+++ b/packages/backend/src/agents/interpreter.ts
@@ -1,5 +1,5 @@
 import { openai } from '../agent.js';
-import type { InterpreterOutput } from '../types/agents.js';
+import type { InterpreterOutput, MediaType } from '../types/agents.js';
 import { z } from 'zod';
 import { zodTextFormat } from 'openai/helpers/zod';

@@ -17,10 +17,12 @@ interface InterpreterInput {
  liked_shows: string;
  disliked_shows: string;
  themes: string;
+  media_type: MediaType;
  feedback_context?: string;
 }

 export async function runInterpreter(input: InterpreterInput): Promise<InterpreterOutput> {
+  const mediaLabel = input.media_type === 'movie' ? 'movie' : 'TV show';
  const feedbackSection = input.feedback_context
    ? `\n\nUser Feedback Context (incorporate into preferences):\n${input.feedback_context}`
    : '';
@@ -30,7 +32,7 @@ export async function runInterpreter(input: InterpreterInput): Promise<Interpret
    temperature: 0.2,
    service_tier: 'flex',
    text: { format: zodTextFormat(InterpreterSchema, "preferences") },
-    instructions: `You are a TV show preference interpreter. Transform raw user input into structured, normalized preferences.
+    instructions: `You are a ${mediaLabel} preference interpreter. Transform raw user input into structured, normalized preferences.

 Rules:
 - Extract implicit preferences from the main prompt
@@ -39,8 +41,8 @@ Rules:
 - Do NOT assume anything not stated or clearly implied
 - Be specific and concrete, not vague`,
    input: `Main prompt: ${input.main_prompt}
-Liked shows: ${input.liked_shows || '(none)'}
-Disliked shows: ${input.disliked_shows || '(none)'}
+Liked ${mediaLabel}s: ${input.liked_shows || '(none)'}
+Disliked ${mediaLabel}s: ${input.disliked_shows || '(none)'}
 Themes and requirements: ${input.themes || '(none)'}${feedbackSection}`,
  });

--- a/packages/backend/src/agents/ranking.ts
+++ b/packages/backend/src/agents/ranking.ts
@@ -1,5 +1,5 @@
 import { openai } from '../agent.js';
-import type { InterpreterOutput, RetrievalOutput, RankingOutput } from '../types/agents.js';
+import type { InterpreterOutput, RetrievalOutput, RankingOutput, MediaType } from '../types/agents.js';
 import { z } from 'zod';
 import { zodTextFormat } from 'openai/helpers/zod';

@@ -13,7 +13,10 @@ const RankingSchema = z.object({
 export async function runRanking(
  interpreter: InterpreterOutput,
  retrieval: RetrievalOutput,
+  mediaType: MediaType = 'tv_show',
 ): Promise<RankingOutput> {
+  const mediaLabel = mediaType === 'movie' ? 'movie' : 'TV show';
+
  // Phase 1: Pre-filter — remove avoidance violations
  const avoidList = interpreter.avoid.map((a) => a.toLowerCase());
  const filtered = retrieval.candidates.filter((c) => {
@@ -43,7 +46,7 @@ export async function runRanking(
      temperature: 0.2,
      service_tier: 'flex',
      text: { format: zodTextFormat(RankingSchema, "ranking") },
-      instructions: `You are a TV show ranking critic. Assign each show to exactly one of four confidence buckets based on how well it matches the user's preferences.
+      instructions: `You are a ${mediaLabel} ranking critic. Assign each ${mediaLabel} to exactly one of four confidence buckets based on how well it matches the user's preferences.

 Buckets:
 - "definitely_like": Near-perfect match to all preferences
@@ -51,15 +54,15 @@ Buckets:
 - "questionable": Partial alignment, some aspects don't match
 - "will_not_like": Likely mismatch, conflicts with preferences or avoidance criteria

-Every show in the input must appear in exactly one bucket. Use the title exactly as given.`,
+Every ${mediaLabel} in the input must appear in exactly one bucket. Use the title exactly as given.`,
      input: `User preferences:
-Liked shows: ${JSON.stringify(interpreter.liked)}
+Liked ${mediaLabel}s: ${JSON.stringify(interpreter.liked)}
 Themes: ${JSON.stringify(interpreter.themes)}
 Character preferences: ${JSON.stringify(interpreter.character_preferences)}
 Tone: ${JSON.stringify(interpreter.tone)}
 Avoid: ${JSON.stringify(interpreter.avoid)}

-Rank these shows:
+Rank these ${mediaLabel}s:
 ${chunkTitles}`,
    });

--- a/packages/backend/src/agents/retrieval.ts
+++ b/packages/backend/src/agents/retrieval.ts
@@ -1,5 +1,5 @@
 import { openai } from '../agent.js';
-import type { InterpreterOutput, RetrievalOutput } from '../types/agents.js';
+import type { InterpreterOutput, RetrievalOutput, MediaType } from '../types/agents.js';
 import { z } from 'zod';
 import { zodTextFormat } from 'openai/helpers/zod';

@@ -10,33 +10,39 @@ const RetrievalSchema = z.object({
  }))
 });

-export async function runRetrieval(input: InterpreterOutput, brainstormCount = 100): Promise<RetrievalOutput> {
+export async function runRetrieval(
+  input: InterpreterOutput,
+  brainstormCount = 100,
+  mediaType: MediaType = 'tv_show',
+  useWebSearch = false,
+): Promise<RetrievalOutput> {
+  const mediaLabel = mediaType === 'movie' ? 'movie' : 'TV show';
+  const mediaLabelPlural = mediaType === 'movie' ? 'movies' : 'TV shows';
+
  const response = await openai.responses.parse({
    model: 'gpt-5.4',
    temperature: 0.9,
    service_tier: 'flex',
-    tools: [
-      { type: 'web_search' }
-    ],
+    ...(useWebSearch ? { tools: [{ type: 'web_search' as const }] } : {}),
    text: { format: zodTextFormat(RetrievalSchema, "candidates") },
-    instructions: `You are a TV show candidate generator. Your goal is to brainstorm a LARGE, DIVERSE pool of ${brainstormCount} TV show candidates that match the user's structured preferences.
+    instructions: `You are a ${mediaLabel} candidate generator. Your goal is to brainstorm a LARGE, DIVERSE pool of ${brainstormCount} ${mediaLabel} candidates that match the user's structured preferences.${useWebSearch ? '\n\nUse web search to find recent and accurate titles, including newer releases.' : ''}

 Rules:
- Include both well-known and obscure shows
+- Include both well-known and obscure ${mediaLabelPlural}
 - Prioritize RECALL over precision — it's better to include too many than too few
- Each "reason" should briefly explain why the show matches the preferences
+- Each "reason" should briefly explain why the ${mediaLabel} matches the preferences
 - Avoid duplicates
- Include shows from different decades, countries, and networks
+- Include ${mediaLabelPlural} from different decades, countries${mediaType === 'tv_show' ? ', and networks' : ', and directors'}
 - Aim for ${brainstormCount} candidates minimum`,
    input: `Structured preferences:
-Liked shows: ${JSON.stringify(input.liked)}
-Disliked shows: ${JSON.stringify(input.disliked)}
+Liked ${mediaLabelPlural}: ${JSON.stringify(input.liked)}
+Disliked ${mediaLabelPlural}: ${JSON.stringify(input.disliked)}
 Themes: ${JSON.stringify(input.themes)}
 Character preferences: ${JSON.stringify(input.character_preferences)}
 Tone: ${JSON.stringify(input.tone)}
 Avoid: ${JSON.stringify(input.avoid)}

-Generate a large, diverse pool of TV show candidates.`,
+Generate a large, diverse pool of ${mediaLabel} candidates.`,
  });

  return (response.output_parsed as RetrievalOutput) ?? { candidates: [] };
--- a/packages/backend/src/agents/titleGenerator.ts
+++ b/packages/backend/src/agents/titleGenerator.ts
@@ -1,12 +1,14 @@
 import { openai } from '../agent.js';
-import type { InterpreterOutput } from '../types/agents.js';
+import type { InterpreterOutput, MediaType } from '../types/agents.js';
+
+export async function generateTitle(interpreter: InterpreterOutput, mediaType: MediaType = 'tv_show'): Promise<string> {
+  const mediaLabel = mediaType === 'movie' ? 'movie' : 'TV show';

-export async function generateTitle(interpreter: InterpreterOutput): Promise<string> {
  const response = await openai.responses.create({
    model: 'gpt-5.4-mini',
    temperature: 0.7,
    service_tier: 'flex',
-    instructions: `Generate a concise 5-8 word title for a TV show recommendation session.
+    instructions: `Generate a concise 5-8 word title for a ${mediaLabel} recommendation session.
 Capture the essence of the user's taste — genre, tone, key themes.
 Respond with ONLY the title. No quotes, no trailing punctuation.
 Examples: "Dark Crime Dramas With Moral Ambiguity", "Cozy British Mysteries With Quirky Detectives"`,
--- a/packages/backend/src/db/schema.ts
+++ b/packages/backend/src/db/schema.ts
@@ -1,4 +1,4 @@
-import { pgTable, uuid, text, jsonb, timestamp, integer, uniqueIndex } from 'drizzle-orm/pg-core';
+import { pgTable, uuid, text, jsonb, timestamp, integer, uniqueIndex, boolean } from 'drizzle-orm/pg-core';
 import type { CuratorOutput } from '../types/agents.js';

 export const recommendations = pgTable('recommendations', {
@@ -9,6 +9,8 @@ export const recommendations = pgTable('recommendations', {
  disliked_shows: text('disliked_shows').notNull().default(''),
  themes: text('themes').notNull().default(''),
  brainstorm_count: integer('brainstorm_count').notNull().default(100),
+  media_type: text('media_type').notNull().default('tv_show'),
+  use_web_search: boolean('use_web_search').notNull().default(false),
  recommendations: jsonb('recommendations').$type<CuratorOutput[]>(),
  status: text('status').notNull().default('pending'),
  created_at: timestamp('created_at').defaultNow().notNull(),
@@ -18,10 +20,10 @@ export const feedback = pgTable(
  'feedback',
  {
    id: uuid('id').defaultRandom().primaryKey(),
-    tv_show_name: text('tv_show_name').notNull(),
+    item_name: text('item_name').notNull(),
    stars: integer('stars').notNull(),
    feedback: text('feedback').notNull().default(''),
    created_at: timestamp('created_at').defaultNow().notNull(),
  },
-  (table) => [uniqueIndex('feedback_tv_show_name_idx').on(table.tv_show_name)],
+  (table) => [uniqueIndex('feedback_item_name_idx').on(table.item_name)],
 );
--- a/packages/backend/src/migrate.ts
+++ b/packages/backend/src/migrate.ts
@@ -0,0 +1,32 @@
+import { drizzle } from 'drizzle-orm/postgres-js';
+import { migrate } from 'drizzle-orm/postgres-js/migrator';
+import postgres from 'postgres';
+import * as dotenv from 'dotenv';
+
+dotenv.config({ path: ['.env.local', '.env'] });
+
+const connectionString = process.env.DATABASE_URL;
+
+if (!connectionString) {
+  console.error('DATABASE_URL is not set');
+  process.exit(1);
+}
+
+// Using max: 1 connection since it's only for migration
+const migrationClient = postgres(connectionString, { max: 1 });
+const db = drizzle(migrationClient);
+
+const runMigrations = async () => {
+  console.log('Running database migrations...');
+  try {
+    await migrate(db, { migrationsFolder: './drizzle' });
+    console.log('Migrations completed successfully.');
+  } catch (err) {
+    console.error('Error running migrations:', err);
+    process.exit(1);
+  } finally {
+    await migrationClient.end();
+  }
+};
+
+runMigrations();
--- a/packages/backend/src/pipelines/recommendation.ts
+++ b/packages/backend/src/pipelines/recommendation.ts
@@ -5,14 +5,14 @@ import { runInterpreter } from '../agents/interpreter.js';
 import { runRetrieval } from '../agents/retrieval.js';
 import { runRanking } from '../agents/ranking.js';
 import { runCurator } from '../agents/curator.js';
-import type { CuratorOutput, SSEEvent } from '../types/agents.js';
+import type { CuratorOutput, MediaType, SSEEvent } from '../types/agents.js';
 import { generateTitle } from '../agents/titleGenerator.js';

 /* -- Agent pipeline --
 [1] Interpreter -> gets user input, transforms into structured data
-[2] Retrieval -> gets shows from OpenAI (high temperature)
-[3] Ranking -> ranks shows based on user input
-[4] Curator -> curates shows based on user input
+[2] Retrieval -> gets candidates from OpenAI (high temperature)
+[3] Ranking -> ranks candidates based on user input
+[4] Curator -> curates candidates based on user input
 */

 type RecommendationRecord = typeof recommendations.$inferSelect;
@@ -33,8 +33,10 @@ export async function runPipeline(
 ): Promise<CuratorOutput[]> {
  let currentStage: SSEEvent['stage'] = 'interpreter';
  const startTime = Date.now();
+  const mediaType = (rec.media_type ?? 'tv_show') as MediaType;
+  const useWebSearch = rec.use_web_search ?? false;

-  log(rec.id, `Starting pipeline for "${rec.title}"${feedbackContext ? ' (with feedback context)' : ''}`);
+  log(rec.id, `Starting pipeline for "${rec.title}" [${mediaType}${useWebSearch ? ', web_search' : ''}]${feedbackContext ? ' (with feedback context)' : ''}`);

  try {
    // Set status to running
@@ -54,6 +56,7 @@ export async function runPipeline(
      liked_shows: rec.liked_shows,
      disliked_shows: rec.disliked_shows,
      themes: rec.themes,
+      media_type: mediaType,
      ...(feedbackContext !== undefined ? { feedback_context: feedbackContext } : {}),
    });
    log(rec.id, `Interpreter: done (${Date.now() - t0}ms)`, {
@@ -70,7 +73,7 @@ export async function runPipeline(
    log(rec.id, 'Retrieval: start');
    sseWrite({ stage: 'retrieval', status: 'start' });
    const t1 = Date.now();
-    const retrievalOutput = await runRetrieval(interpreterOutput, rec.brainstorm_count);
+    const retrievalOutput = await runRetrieval(interpreterOutput, rec.brainstorm_count, mediaType, useWebSearch);
    log(rec.id, `Retrieval: done (${Date.now() - t1}ms) — ${retrievalOutput.candidates.length} candidates`, {
      titles: retrievalOutput.candidates.map((c) => c.title),
    });
@@ -81,7 +84,7 @@ export async function runPipeline(
    log(rec.id, 'Ranking: start');
    sseWrite({ stage: 'ranking', status: 'start' });
    const t2 = Date.now();
-    const rankingOutput = await runRanking(interpreterOutput, retrievalOutput);
+    const rankingOutput = await runRanking(interpreterOutput, retrievalOutput, mediaType);
    log(rec.id, `Ranking: done (${Date.now() - t2}ms)`, {
      definitely_like: rankingOutput.definitely_like.length,
      might_like: rankingOutput.might_like.length,
@@ -95,15 +98,15 @@ export async function runPipeline(
    log(rec.id, 'Curator: start');
    sseWrite({ stage: 'curator', status: 'start' });
    const t3 = Date.now();
-    const curatorOutput = await runCurator(rankingOutput, interpreterOutput);
-    log(rec.id, `Curator: done (${Date.now() - t3}ms) — ${curatorOutput.length} shows curated`);
+    const curatorOutput = await runCurator(rankingOutput, interpreterOutput, mediaType, useWebSearch);
+    log(rec.id, `Curator: done (${Date.now() - t3}ms) — ${curatorOutput.length} items curated`);
    sseWrite({ stage: 'curator', status: 'done', data: curatorOutput });

    // Generate AI title
    let aiTitle: string = rec.title;
    try {
      log(rec.id, 'Title generation: start');
-      aiTitle = await generateTitle(interpreterOutput);
+      aiTitle = await generateTitle(interpreterOutput, mediaType);
      log(rec.id, `Title generation: done — "${aiTitle}"`);
    } catch (err) {
      log(rec.id, `Title generation failed, keeping initial title: ${String(err)}`);
--- a/packages/backend/src/routes/feedback.ts
+++ b/packages/backend/src/routes/feedback.ts
@@ -1,13 +1,12 @@
 import type { FastifyInstance } from 'fastify';
-import { eq } from 'drizzle-orm';
 import { db } from '../db.js';
 import { feedback } from '../db/schema.js';

 export default async function feedbackRoute(fastify: FastifyInstance) {
-  // POST /feedback — upsert by tv_show_name
+  // POST /feedback — upsert by item_name
  fastify.post('/feedback', async (request, reply) => {
    const body = request.body as {
-      tv_show_name: string;
+      item_name: string;
      stars: number;
      feedback?: string;
    };
@@ -15,12 +14,12 @@ export default async function feedbackRoute(fastify: FastifyInstance) {
    await db
      .insert(feedback)
      .values({
-        tv_show_name: body.tv_show_name,
+        item_name: body.item_name,
        stars: body.stars,
        feedback: body.feedback ?? '',
      })
      .onConflictDoUpdate({
-        target: feedback.tv_show_name,
+        target: feedback.item_name,
        set: {
          stars: body.stars,
          feedback: body.feedback ?? '',
--- a/packages/backend/src/routes/recommendations.ts
+++ b/packages/backend/src/routes/recommendations.ts
@@ -3,7 +3,7 @@ import { eq, desc } from 'drizzle-orm';
 import { db } from '../db.js';
 import { recommendations, feedback } from '../db/schema.js';
 import { runPipeline } from '../pipelines/recommendation.js';
-import type { SSEEvent } from '../types/agents.js';
+import type { MediaType, SSEEvent } from '../types/agents.js';

 export default async function recommendationsRoute(fastify: FastifyInstance) {
  // POST /recommendations — create record, return { id }
@@ -14,6 +14,8 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
      disliked_shows?: string;
      themes?: string;
      brainstorm_count?: number;
+      media_type?: string;
+      use_web_search?: boolean;
    };

    const title = (body.main_prompt ?? '')
@@ -24,6 +26,8 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {

    const rawCount = Number(body.brainstorm_count ?? 100);
    const brainstorm_count = Number.isFinite(rawCount) ? Math.min(200, Math.max(50, rawCount)) : 100;
+    const media_type: MediaType = body.media_type === 'movie' ? 'movie' : 'tv_show';
+    const use_web_search = body.use_web_search === true;

    const [rec] = await db
      .insert(recommendations)
@@ -34,6 +38,8 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
        disliked_shows: body.disliked_shows ?? '',
        themes: body.themes ?? '',
        brainstorm_count,
+        media_type,
+        use_web_search,
        status: 'pending',
      })
      .returning({ id: recommendations.id });
@@ -48,6 +54,7 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
        id: recommendations.id,
        title: recommendations.title,
        status: recommendations.status,
+        media_type: recommendations.media_type,
        created_at: recommendations.created_at,
      })
      .from(recommendations)
@@ -68,7 +75,6 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {
  });

  // GET /recommendations/:id/stream — SSE pipeline stream
-  // Always fetches all current feedback and injects if present (supports rerank flow)
  fastify.get('/recommendations/:id/stream', async (request, reply) => {
    const { id } = request.params as { id: string };
    const [rec] = await db
@@ -80,12 +86,13 @@ export default async function recommendationsRoute(fastify: FastifyInstance) {

    // Load all feedback to potentially inject as context
    const feedbackRows = await db.select().from(feedback);
+    const mediaLabel = rec.media_type === 'movie' ? 'Movie' : 'Show';
    const feedbackContext =
      feedbackRows.length > 0
        ? feedbackRows
            .map(
              (f) =>
-                `Show: "${f.tv_show_name}" — Rating: ${f.stars}/3 stars${f.feedback ? ` — Comment: ${f.feedback}` : ''}`,
+                `${mediaLabel}: "${f.item_name}" — Rating: ${f.stars}/3 stars${f.feedback ? ` — Comment: ${f.feedback}` : ''}`,
            )
            .join('\n')
        : undefined;
--- a/packages/backend/src/types/agents.ts
+++ b/packages/backend/src/types/agents.ts
@@ -1,3 +1,5 @@
+export type MediaType = 'tv_show' | 'movie';
+
 export interface InterpreterOutput {
  liked: string[];
  disliked: string[];