recommender/packages/backend/src/agents/ranking.ts

import { openai, defaultModel, serviceOptions } from '../agent.js';
import type { InterpreterOutput, RetrievalOutput, RankingOutput, MediaType } from '../types/agents.js';
import { z } from 'zod';
import { zodTextFormat } from 'openai/helpers/zod';

const RankingSchema = z.object({
  full_match: z.array(z.string()),
  definitely_like: z.array(z.string()),
  might_like: z.array(z.string()),
  questionable: z.array(z.string()),
  will_not_like: z.array(z.string())
});

export async function runRanking(
  interpreter: InterpreterOutput,
  retrieval: RetrievalOutput,
  mediaType: MediaType = 'tv_show',
  hardRequirements = false,
): Promise<RankingOutput> {
  const mediaLabel = mediaType === 'movie' ? 'movie' : 'TV show';

  // Phase 1: Pre-filter — remove avoidance violations
  const avoidList = interpreter.avoid.map((a) => a.toLowerCase());
  const filtered = retrieval.candidates.filter((c) => {
    const text = (c.title + ' ' + c.reason).toLowerCase();
    return !avoidList.some((a) => text.includes(a));
  });

  // Phase 2: Chunked ranking — split into groups of ~15
  const CHUNK_SIZE = 15;
  const chunks: typeof filtered[] = [];
  for (let i = 0; i < filtered.length; i += CHUNK_SIZE) {
    chunks.push(filtered.slice(i, i + CHUNK_SIZE));
  }

  const allTags: RankingOutput = {
    full_match: [],
    definitely_like: [],
    might_like: [],
    questionable: [],
    will_not_like: [],
  };

  for (const chunk of chunks) {
    const chunkTitles = chunk.map((c) => `- ${c.title}: ${c.reason}`).join('\n');

    const response = await openai.responses.parse({
      model: defaultModel,
      temperature: 0.2,
      max_completion_tokens: 16384,
      ...serviceOptions,
      text: { format: zodTextFormat(RankingSchema, "ranking") },
      instructions: `You are a ${mediaLabel} ranking critic. Assign each ${mediaLabel} to exactly one of five confidence tags based on how well it matches the user's preferences.

Tags:
- "full_match": 100% match — perfectly satisfies every stated preference, requirement, and avoidance criteria with no compromises
- "definitely_like": Near-perfect match to all preferences with only minor caveats
- "might_like": Strong match to most preferences
- "questionable": Partial alignment, some aspects don't match
- "will_not_like": Likely mismatch, conflicts with preferences or avoidance criteria

Every ${mediaLabel} in the input must appear in exactly one tag. Use the title exactly as given.${hardRequirements ? '\n\nHARD REQUIREMENTS MODE: Any candidate that does not satisfy every stated requirement must be placed in "will_not_like", regardless of other qualities.' : ''}`,
      input: `User preferences:
Liked ${mediaLabel}s: ${interpreter.liked.join(', ') || '(none)'}
Themes: ${interpreter.themes.join(', ') || '(none)'}
Character preferences: ${interpreter.character_preferences.join(', ') || '(none)'}
Tone: ${interpreter.tone.join(', ') || '(none)'}
Avoid: ${interpreter.avoid.join(', ') || '(none)'}
Requirements: ${interpreter.requirements.join(', ') || '(none)'}

Rank these ${mediaLabel}s:
${chunkTitles}`,
    });

    const chunkResult = (response.output_parsed as Partial<RankingOutput>) ?? {};

    allTags.full_match.push(...(chunkResult.full_match ?? []));
    allTags.definitely_like.push(...(chunkResult.definitely_like ?? []));
    allTags.might_like.push(...(chunkResult.might_like ?? []));
    allTags.questionable.push(...(chunkResult.questionable ?? []));
    allTags.will_not_like.push(...(chunkResult.will_not_like ?? []));
  }

  return allTags;
}