initial commit
Some checks failed
Recommender Build and Deploy (internal) / Build Recommender Image (push) Failing after 3m48s
Recommender Build and Deploy (internal) / Deploy Recommender (internal) (push) Has been skipped

This commit is contained in:
2026-03-25 17:34:37 -03:00
commit f9c7582e4d
52 changed files with 7022 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
import OpenAI from 'openai';
import * as dotenv from 'dotenv';
dotenv.config();
export const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
export async function askAgent(prompt: string) {
try {
const response = await openai.chat.completions.create({
model: 'gpt-5.4',
service_tier: 'flex',
messages: [{ role: 'user', content: prompt }],
});
return response!.choices![0]!.message!.content;
} catch (err) {
console.error('Agent endpoint dummy error:', err instanceof Error ? err.message : err);
return 'Agent is in dummy mode or encountered an error.';
}
}

View File

@@ -0,0 +1,66 @@
import { openai } from '../agent.js';
import type { InterpreterOutput, RankingOutput, CuratorOutput } from '../types/agents.js';
export async function runCurator(
ranking: RankingOutput,
interpreter: InterpreterOutput,
): Promise<CuratorOutput[]> {
const allShows = [
...ranking.definitely_like.map((t) => ({ title: t, category: 'Definitely Like' as const })),
...ranking.might_like.map((t) => ({ title: t, category: 'Might Like' as const })),
...ranking.questionable.map((t) => ({ title: t, category: 'Questionable' as const })),
...ranking.will_not_like.map((t) => ({ title: t, category: 'Will Not Like' as const })),
];
if (allShows.length === 0) return [];
const showList = allShows
.map((s) => `- "${s.title}" (${s.category})`)
.join('\n');
const response = await openai.chat.completions.create({
model: 'gpt-5.4-mini',
temperature: 0.5,
service_tier: 'flex',
response_format: { type: 'json_object' },
messages: [
{
role: 'system',
content: `You are a TV show recommendation curator. For each show, write a concise 1-2 sentence explanation of why it was assigned to its category based on the user's preferences.
Your output MUST be valid JSON:
{
"shows": [
{
"title": string,
"explanation": string,
"category": "Definitely Like" | "Might Like" | "Questionable" | "Will Not Like"
}
]
}
Rules:
- Preserve the exact title and category as given
- Keep explanations concise (1-2 sentences max)
- Reference specific user preferences in the explanation
- Be honest — explain why "Questionable" or "Will Not Like" shows got that rating`,
},
{
role: 'user',
content: `User preferences summary:
Liked: ${JSON.stringify(interpreter.liked)}
Themes: ${JSON.stringify(interpreter.themes)}
Tone: ${JSON.stringify(interpreter.tone)}
Character preferences: ${JSON.stringify(interpreter.character_preferences)}
Avoid: ${JSON.stringify(interpreter.avoid)}
Shows to describe:
${showList}`,
},
],
});
const content = response.choices[0]?.message?.content ?? '{"shows":[]}';
const result = JSON.parse(content) as { shows: CuratorOutput[] };
return result.shows ?? [];
}

View File

@@ -0,0 +1,56 @@
import { openai } from '../agent.js';
import type { InterpreterOutput } from '../types/agents.js';
interface InterpreterInput {
main_prompt: string;
liked_shows: string;
disliked_shows: string;
themes: string;
feedback_context?: string;
}
export async function runInterpreter(input: InterpreterInput): Promise<InterpreterOutput> {
const feedbackSection = input.feedback_context
? `\n\nUser Feedback Context (incorporate into preferences):\n${input.feedback_context}`
: '';
const response = await openai.chat.completions.create({
model: 'gpt-5.4-mini',
temperature: 0.2,
service_tier: 'flex',
response_format: { type: 'json_object' },
messages: [
{
role: 'system',
content: `You are a TV show preference interpreter. Transform raw user input into structured, normalized preferences.
Your output MUST be valid JSON matching this schema:
{
"liked": string[], // shows the user likes
"disliked": string[], // shows the user dislikes
"themes": string[], // normalized themes (e.g. "spy" -> "espionage")
"character_preferences": string[], // character types they prefer
"tone": string[], // tone descriptors (e.g. "serious", "grounded", "dark")
"avoid": string[] // things to explicitly avoid
}
Rules:
- Extract implicit preferences from the main prompt
- Normalize terminology (e.g. "spy" → "espionage", "cop show" → "police procedural")
- Detect and resolve contradictions (prefer explicit over implicit)
- Do NOT assume anything not stated or clearly implied
- Be specific and concrete, not vague`,
},
{
role: 'user',
content: `Main prompt: ${input.main_prompt}
Liked shows: ${input.liked_shows || '(none)'}
Disliked shows: ${input.disliked_shows || '(none)'}
Themes and requirements: ${input.themes || '(none)'}${feedbackSection}`,
},
],
});
const content = response.choices[0]?.message?.content ?? '{}';
return JSON.parse(content) as InterpreterOutput;
}

View File

@@ -0,0 +1,83 @@
import { openai } from '../agent.js';
import type { InterpreterOutput, RetrievalOutput, RankingOutput } from '../types/agents.js';
export async function runRanking(
interpreter: InterpreterOutput,
retrieval: RetrievalOutput,
): Promise<RankingOutput> {
// Phase 1: Pre-filter — remove avoidance violations
const avoidList = interpreter.avoid.map((a) => a.toLowerCase());
const filtered = retrieval.candidates.filter((c) => {
const text = (c.title + ' ' + c.reason).toLowerCase();
return !avoidList.some((a) => text.includes(a));
});
// Phase 2: Chunked ranking — split into groups of ~15
const CHUNK_SIZE = 15;
const chunks: typeof filtered[] = [];
for (let i = 0; i < filtered.length; i += CHUNK_SIZE) {
chunks.push(filtered.slice(i, i + CHUNK_SIZE));
}
const allBuckets: RankingOutput = {
definitely_like: [],
might_like: [],
questionable: [],
will_not_like: [],
};
for (const chunk of chunks) {
const chunkTitles = chunk.map((c) => `- ${c.title}: ${c.reason}`).join('\n');
const response = await openai.chat.completions.create({
model: 'gpt-5.4',
temperature: 0.2,
service_tier: 'flex',
response_format: { type: 'json_object' },
messages: [
{
role: 'system',
content: `You are a TV show ranking critic. Assign each show to exactly one of four confidence buckets based on how well it matches the user's preferences.
Buckets:
- "definitely_like": Near-perfect match to all preferences
- "might_like": Strong match to most preferences
- "questionable": Partial alignment, some aspects don't match
- "will_not_like": Likely mismatch, conflicts with preferences or avoidance criteria
Your output MUST be valid JSON:
{
"definitely_like": string[],
"might_like": string[],
"questionable": string[],
"will_not_like": string[]
}
Every show in the input must appear in exactly one bucket. Use the title exactly as given.`,
},
{
role: 'user',
content: `User preferences:
Liked shows: ${JSON.stringify(interpreter.liked)}
Themes: ${JSON.stringify(interpreter.themes)}
Character preferences: ${JSON.stringify(interpreter.character_preferences)}
Tone: ${JSON.stringify(interpreter.tone)}
Avoid: ${JSON.stringify(interpreter.avoid)}
Rank these shows:
${chunkTitles}`,
},
],
});
const content = response.choices[0]?.message?.content ?? '{}';
const chunkResult = JSON.parse(content) as Partial<RankingOutput>;
allBuckets.definitely_like.push(...(chunkResult.definitely_like ?? []));
allBuckets.might_like.push(...(chunkResult.might_like ?? []));
allBuckets.questionable.push(...(chunkResult.questionable ?? []));
allBuckets.will_not_like.push(...(chunkResult.will_not_like ?? []));
}
return allBuckets;
}

View File

@@ -0,0 +1,47 @@
import { openai } from '../agent.js';
import type { InterpreterOutput, RetrievalOutput } from '../types/agents.js';
export async function runRetrieval(input: InterpreterOutput): Promise<RetrievalOutput> {
const response = await openai.chat.completions.create({
model: 'gpt-5.4',
temperature: 0.9,
service_tier: 'flex',
response_format: { type: 'json_object' },
messages: [
{
role: 'system',
content: `You are a TV show candidate generator. Your goal is to brainstorm a LARGE, DIVERSE pool of 6080 TV show candidates that match the user's structured preferences.
Your output MUST be valid JSON matching this schema:
{
"candidates": [
{ "title": string, "reason": string }
]
}
Rules:
- Include both well-known and obscure shows
- Prioritize RECALL over precision — it's better to include too many than too few
- Each "reason" should briefly explain why the show matches the preferences
- Avoid duplicates
- Include shows from different decades, countries, and networks
- Aim for 6080 candidates minimum`,
},
{
role: 'user',
content: `Structured preferences:
Liked shows: ${JSON.stringify(input.liked)}
Disliked shows: ${JSON.stringify(input.disliked)}
Themes: ${JSON.stringify(input.themes)}
Character preferences: ${JSON.stringify(input.character_preferences)}
Tone: ${JSON.stringify(input.tone)}
Avoid: ${JSON.stringify(input.avoid)}
Generate a large, diverse pool of TV show candidates.`,
},
],
});
const content = response.choices[0]?.message?.content ?? '{"candidates":[]}';
return JSON.parse(content) as RetrievalOutput;
}

View File

@@ -0,0 +1,9 @@
import { drizzle } from 'drizzle-orm/postgres-js';
import postgres from 'postgres';
import * as dotenv from 'dotenv';
dotenv.config();
const connectionString = process.env.DATABASE_URL || 'postgres://user:password@iris.haven:5432/recommender';
export const client = postgres(connectionString);
export const db = drizzle(client);

View File

@@ -0,0 +1,2 @@
export { db, client } from '../db.js';
export * as schema from './schema.js';

View File

@@ -0,0 +1,26 @@
import { pgTable, uuid, text, jsonb, timestamp, integer, uniqueIndex } from 'drizzle-orm/pg-core';
import type { CuratorOutput } from '../types/agents.js';
export const recommendations = pgTable('recommendations', {
id: uuid('id').defaultRandom().primaryKey(),
title: text('title').notNull(),
main_prompt: text('main_prompt').notNull(),
liked_shows: text('liked_shows').notNull().default(''),
disliked_shows: text('disliked_shows').notNull().default(''),
themes: text('themes').notNull().default(''),
recommendations: jsonb('recommendations').$type<CuratorOutput[]>(),
status: text('status').notNull().default('pending'),
created_at: timestamp('created_at').defaultNow().notNull(),
});
export const feedback = pgTable(
'feedback',
{
id: uuid('id').defaultRandom().primaryKey(),
tv_show_name: text('tv_show_name').notNull(),
stars: integer('stars').notNull(),
feedback: text('feedback').notNull().default(''),
created_at: timestamp('created_at').defaultNow().notNull(),
},
(table) => [uniqueIndex('feedback_tv_show_name_idx').on(table.tv_show_name)],
);

View File

@@ -0,0 +1,36 @@
import Fastify from 'fastify';
import * as dotenv from 'dotenv';
import recommendationsRoute from './routes/recommendations.js';
import feedbackRoute from './routes/feedback.js';
// Load .env first, then .env.local
// env vars set on the container take precedence over both files
dotenv.config();
dotenv.config({ path: '.env.local', override: true });
const fastify = Fastify({ logger: true });
// CORS — allow frontend dev server and production
fastify.addHook('onRequest', async (_req, reply) => {
reply.header('Access-Control-Allow-Origin', '*');
reply.header('Access-Control-Allow-Methods', 'GET,POST,OPTIONS');
reply.header('Access-Control-Allow-Headers', 'Content-Type');
});
fastify.options('*', async (_req, reply) => {
return reply.send();
});
// Body parsing is included in Fastify by default for JSON
await fastify.register(recommendationsRoute);
await fastify.register(feedbackRoute);
const port = Number(process.env['PORT'] ?? 3000);
try {
await fastify.listen({ port, host: '0.0.0.0' });
console.log(`Backend listening on http://localhost:${port}`);
} catch (err) {
fastify.log.error(err);
process.exit(1);
}

View File

@@ -0,0 +1,118 @@
import { eq } from 'drizzle-orm';
import { db } from '../db.js';
import { recommendations } from '../db/schema.js';
import { runInterpreter } from '../agents/interpreter.js';
import { runRetrieval } from '../agents/retrieval.js';
import { runRanking } from '../agents/ranking.js';
import { runCurator } from '../agents/curator.js';
import type { CuratorOutput, SSEEvent } from '../types/agents.js';
type RecommendationRecord = typeof recommendations.$inferSelect;
function log(recId: string, msg: string, data?: unknown) {
const ts = new Date().toISOString();
if (data !== undefined) {
console.log(`[pipeline] [${ts}] [${recId}] ${msg}`, data);
} else {
console.log(`[pipeline] [${ts}] [${recId}] ${msg}`);
}
}
export async function runPipeline(
rec: RecommendationRecord,
sseWrite: (event: SSEEvent) => void,
feedbackContext?: string,
): Promise<CuratorOutput[]> {
let currentStage: SSEEvent['stage'] = 'interpreter';
const startTime = Date.now();
log(rec.id, `Starting pipeline for "${rec.title}"${feedbackContext ? ' (with feedback context)' : ''}`);
try {
// Set status to running
log(rec.id, 'Setting status → running');
await db
.update(recommendations)
.set({ status: 'running' })
.where(eq(recommendations.id, rec.id));
// --- Interpreter ---
currentStage = 'interpreter';
log(rec.id, 'Interpreter: start');
sseWrite({ stage: 'interpreter', status: 'start' });
const t0 = Date.now();
const interpreterOutput = await runInterpreter({
main_prompt: rec.main_prompt,
liked_shows: rec.liked_shows,
disliked_shows: rec.disliked_shows,
themes: rec.themes,
...(feedbackContext !== undefined ? { feedback_context: feedbackContext } : {}),
});
log(rec.id, `Interpreter: done (${Date.now() - t0}ms)`, {
liked: interpreterOutput.liked,
disliked: interpreterOutput.disliked,
themes: interpreterOutput.themes,
tone: interpreterOutput.tone,
avoid: interpreterOutput.avoid,
});
sseWrite({ stage: 'interpreter', status: 'done', data: interpreterOutput });
// --- Retrieval ---
currentStage = 'retrieval';
log(rec.id, 'Retrieval: start');
sseWrite({ stage: 'retrieval', status: 'start' });
const t1 = Date.now();
const retrievalOutput = await runRetrieval(interpreterOutput);
log(rec.id, `Retrieval: done (${Date.now() - t1}ms) — ${retrievalOutput.candidates.length} candidates`, {
titles: retrievalOutput.candidates.map((c) => c.title),
});
sseWrite({ stage: 'retrieval', status: 'done', data: retrievalOutput });
// --- Ranking ---
currentStage = 'ranking';
log(rec.id, 'Ranking: start');
sseWrite({ stage: 'ranking', status: 'start' });
const t2 = Date.now();
const rankingOutput = await runRanking(interpreterOutput, retrievalOutput);
log(rec.id, `Ranking: done (${Date.now() - t2}ms)`, {
definitely_like: rankingOutput.definitely_like.length,
might_like: rankingOutput.might_like.length,
questionable: rankingOutput.questionable.length,
will_not_like: rankingOutput.will_not_like.length,
});
sseWrite({ stage: 'ranking', status: 'done', data: rankingOutput });
// --- Curator ---
currentStage = 'curator';
log(rec.id, 'Curator: start');
sseWrite({ stage: 'curator', status: 'start' });
const t3 = Date.now();
const curatorOutput = await runCurator(rankingOutput, interpreterOutput);
log(rec.id, `Curator: done (${Date.now() - t3}ms) — ${curatorOutput.length} shows curated`);
sseWrite({ stage: 'curator', status: 'done', data: curatorOutput });
// Save results to DB
log(rec.id, 'Saving results to DB');
await db
.update(recommendations)
.set({ recommendations: curatorOutput, status: 'done' })
.where(eq(recommendations.id, rec.id));
sseWrite({ stage: 'complete', status: 'done' });
log(rec.id, `Pipeline complete (total: ${Date.now() - startTime}ms)`);
return curatorOutput;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
log(rec.id, `Pipeline error at stage "${currentStage}": ${message}`);
sseWrite({ stage: currentStage, status: 'error', data: { message } });
await db
.update(recommendations)
.set({ status: 'error' })
.where(eq(recommendations.id, rec.id));
return [];
}
}

View File

@@ -0,0 +1,38 @@
import type { FastifyInstance } from 'fastify';
import { eq } from 'drizzle-orm';
import { db } from '../db.js';
import { feedback } from '../db/schema.js';
export default async function feedbackRoute(fastify: FastifyInstance) {
// POST /feedback — upsert by tv_show_name
fastify.post('/feedback', async (request, reply) => {
const body = request.body as {
tv_show_name: string;
stars: number;
feedback?: string;
};
await db
.insert(feedback)
.values({
tv_show_name: body.tv_show_name,
stars: body.stars,
feedback: body.feedback ?? '',
})
.onConflictDoUpdate({
target: feedback.tv_show_name,
set: {
stars: body.stars,
feedback: body.feedback ?? '',
},
});
return reply.code(201).send({ ok: true });
});
// GET /feedback — return all feedback entries
fastify.get('/feedback', async (_request, reply) => {
const rows = await db.select().from(feedback);
return reply.send(rows);
});
}

View File

@@ -0,0 +1,124 @@
import type { FastifyInstance } from 'fastify';
import { eq, desc } from 'drizzle-orm';
import { db } from '../db.js';
import { recommendations, feedback } from '../db/schema.js';
import { runPipeline } from '../pipelines/recommendation.js';
import type { SSEEvent } from '../types/agents.js';
export default async function recommendationsRoute(fastify: FastifyInstance) {
// POST /recommendations — create record, return { id }
fastify.post('/recommendations', async (request, reply) => {
const body = request.body as {
main_prompt: string;
liked_shows?: string;
disliked_shows?: string;
themes?: string;
};
const title = (body.main_prompt ?? '')
.trim()
.split(/\s+/)
.slice(0, 5)
.join(' ');
const [rec] = await db
.insert(recommendations)
.values({
title: title || 'Untitled',
main_prompt: body.main_prompt ?? '',
liked_shows: body.liked_shows ?? '',
disliked_shows: body.disliked_shows ?? '',
themes: body.themes ?? '',
status: 'pending',
})
.returning({ id: recommendations.id });
return reply.code(201).send({ id: rec?.id });
});
// GET /recommendations — list all
fastify.get('/recommendations', async (_request, reply) => {
const rows = await db
.select({
id: recommendations.id,
title: recommendations.title,
status: recommendations.status,
created_at: recommendations.created_at,
})
.from(recommendations)
.orderBy(desc(recommendations.created_at));
return reply.send(rows);
});
// GET /recommendations/:id — full record
fastify.get('/recommendations/:id', async (request, reply) => {
const { id } = request.params as { id: string };
const [rec] = await db
.select()
.from(recommendations)
.where(eq(recommendations.id, id));
if (!rec) return reply.code(404).send({ error: 'Not found' });
return reply.send(rec);
});
// GET /recommendations/:id/stream — SSE pipeline stream
// Always fetches all current feedback and injects if present (supports rerank flow)
fastify.get('/recommendations/:id/stream', async (request, reply) => {
const { id } = request.params as { id: string };
const [rec] = await db
.select()
.from(recommendations)
.where(eq(recommendations.id, id));
if (!rec) return reply.code(404).send({ error: 'Not found' });
// Load all feedback to potentially inject as context
const feedbackRows = await db.select().from(feedback);
const feedbackContext =
feedbackRows.length > 0
? feedbackRows
.map(
(f) =>
`Show: "${f.tv_show_name}" — Rating: ${f.stars}/3 stars${f.feedback ? ` — Comment: ${f.feedback}` : ''}`,
)
.join('\n')
: undefined;
// Set SSE headers and hijack
reply.raw.setHeader('Content-Type', 'text/event-stream');
reply.raw.setHeader('Cache-Control', 'no-cache');
reply.raw.setHeader('Connection', 'keep-alive');
reply.raw.setHeader('Access-Control-Allow-Origin', '*');
reply.raw.flushHeaders();
reply.hijack();
const sseWrite = (event: SSEEvent) => {
reply.raw.write(`data: ${JSON.stringify(event)}\n\n`);
};
try {
await runPipeline(rec, sseWrite, feedbackContext);
} finally {
reply.raw.end();
}
});
// POST /recommendations/:id/rerank — reset status so client can re-open SSE stream
fastify.post('/recommendations/:id/rerank', async (request, reply) => {
const { id } = request.params as { id: string };
const [rec] = await db
.select({ id: recommendations.id })
.from(recommendations)
.where(eq(recommendations.id, id));
if (!rec) return reply.code(404).send({ error: 'Not found' });
await db
.update(recommendations)
.set({ status: 'pending' })
.where(eq(recommendations.id, id));
return reply.send({ ok: true });
});
}

View File

@@ -0,0 +1,41 @@
export interface InterpreterOutput {
liked: string[];
disliked: string[];
themes: string[];
character_preferences: string[];
tone: string[];
avoid: string[];
}
export interface RetrievalCandidate {
title: string;
reason: string;
}
export interface RetrievalOutput {
candidates: RetrievalCandidate[];
}
export interface RankingOutput {
definitely_like: string[];
might_like: string[];
questionable: string[];
will_not_like: string[];
}
export type CuratorCategory = 'Definitely Like' | 'Might Like' | 'Questionable' | 'Will Not Like';
export interface CuratorOutput {
title: string;
explanation: string;
category: CuratorCategory;
}
export type PipelineStage = 'interpreter' | 'retrieval' | 'ranking' | 'curator' | 'complete';
export type SSEStatus = 'start' | 'done' | 'error';
export interface SSEEvent {
stage: PipelineStage;
status: SSEStatus;
data?: unknown;
}