diff --git a/.env.example b/.env.example index 5748ec493..553b83f36 100644 --- a/.env.example +++ b/.env.example @@ -136,8 +136,12 @@ NEXT_PUBLIC_FEATURE_BILLING_ENABLED= NEXT_PUBLIC_DISABLE_SIGNUP= # [[AI]] +# AI Gateway +AI_GATEWAY_API_KEY="" # OPTIONAL: API key for Google Generative AI (Gemini). Get your key from https://ai.google.dev GOOGLE_GENERATIVE_AI_API_KEY="" +# OPTIONAL: Enable AI field detection debug mode to save preview images with bounding boxes +NEXT_PUBLIC_AI_DEBUG_PREVIEW= # [[E2E Tests]] E2E_TEST_AUTHENTICATE_USERNAME="Test User" diff --git a/.gitignore b/.gitignore index 9e622a76f..f21292b6b 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,6 @@ CLAUDE.md # agents .specs + +# ai debug previews +packages/assets/ai-previews/ diff --git a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx index ff6d55271..8518f6733 100644 --- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx +++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx @@ -299,17 +299,10 @@ export const EnvelopeEditorFieldsPage = () => { return; } - console.log('Successfully captured page 1 as PNG Blob:', { - size: `${(blob.size / 1024).toFixed(2)} KB`, - type: blob.type, - }); - console.log('Blob object:', blob); - - console.log('[Auto Add Fields] Sending image to AI endpoint...'); const formData = new FormData(); formData.append('image', blob, 'page-1.png'); - const response = await fetch('/api/ai/detect-object-and-draw', { + const response = await fetch('/api/ai/detect-form-fields', { method: 'POST', body: formData, credentials: 'include', @@ -320,10 +313,6 @@ export const EnvelopeEditorFieldsPage = () => { } const detectedFields = await response.json(); - console.log( - `[Auto Add Fields] Detected ${detectedFields.length} fields:`, - detectedFields, - ); if (!editorFields.selectedRecipient || !currentEnvelopeItem) { toast({ @@ -336,9 +325,12 @@ export const EnvelopeEditorFieldsPage = () => { const pageCanvasRefs = getPageCanvasRefs(1); if (!pageCanvasRefs) { - console.warn( - '[Auto Add Fields] Could not get page dimensions for minimum field enforcement', - ); + toast({ + title: t`Error`, + description: t`Failed to capture page. Please ensure the document is fully loaded.`, + variant: 'destructive', + }); + return; } let addedCount = 0; @@ -381,20 +373,19 @@ export const EnvelopeEditorFieldsPage = () => { }); addedCount++; } catch (error) { - console.error(`Failed to add ${fieldType} field:`, error); + toast({ + title: t`Error`, + description: t`Failed to add field. Please try again.`, + variant: 'destructive', + }); } } - console.log( - `[Auto Add Fields] Successfully added ${addedCount} fields to the document`, - ); - toast({ title: t`Success`, description: t`Added ${addedCount} fields to the document`, }); } catch (error) { - console.error('Auto add fields error:', error); toast({ title: t`Error`, description: t`An unexpected error occurred while capturing the page.`, diff --git a/apps/remix/server/api/ai.ts b/apps/remix/server/api/ai.ts index eae086d11..8f1f7a784 100644 --- a/apps/remix/server/api/ai.ts +++ b/apps/remix/server/api/ai.ts @@ -1,6 +1,5 @@ -import { sValidator } from '@hono/standard-validator'; import { generateObject } from 'ai'; -import { readFile, writeFile } from 'fs/promises'; +import { mkdir, writeFile } from 'fs/promises'; import { Hono } from 'hono'; import { join } from 'path'; import sharp from 'sharp'; @@ -8,13 +7,13 @@ import { Canvas, Image } from 'skia-canvas'; import { getSession } from '@documenso/auth/server/lib/utils/get-session'; import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error'; +import { env } from '@documenso/lib/utils/env'; import type { HonoEnv } from '../router'; import { - type TDetectObjectsResponse, - ZDetectObjectsAndDrawRequestSchema, - ZDetectObjectsRequestSchema, - ZDetectedObjectSchema, + type TDetectFormFieldsResponse, + ZDetectFormFieldsRequestSchema, + ZDetectedFormFieldSchema, } from './ai.types'; /** @@ -91,15 +90,14 @@ When detecting thin horizontal lines for SIGNATURE, INITIALS, NAME, EMAIL, DATE, - Expanded field: [ymin=420, xmin=200, ymax=500, xmax=600] (creates 80-unit tall field) - This gives comfortable signing space while respecting the form layout`; -const runObjectDetection = async (imageBuffer: Buffer): Promise => { +const runFormFieldDetection = async (imageBuffer: Buffer): Promise => { const compressedImageBuffer = await resizeAndCompressImage(imageBuffer); const base64Image = compressedImageBuffer.toString('base64'); const result = await generateObject({ - // model: google('gemini-2.5-pro'), model: 'google/gemini-2.5-pro', output: 'array', - schema: ZDetectedObjectSchema, + schema: ZDetectedFormFieldSchema, messages: [ { role: 'user', @@ -120,74 +118,37 @@ const runObjectDetection = async (imageBuffer: Buffer): Promise() - .post('/detect-objects', sValidator('json', ZDetectObjectsRequestSchema), async (c) => { - try { - await getSession(c.req.raw); +export const aiRoute = new Hono().post('/detect-form-fields', async (c) => { + try { + await getSession(c.req.raw); - const { imagePath } = c.req.valid('json'); + const parsedBody = await c.req.parseBody(); + const rawImage = parsedBody.image; + const imageCandidate = Array.isArray(rawImage) ? rawImage[0] : rawImage; + const parsed = ZDetectFormFieldsRequestSchema.safeParse({ image: imageCandidate }); - const imageBuffer = await readFile(imagePath); - const detectedObjects = await runObjectDetection(imageBuffer); - - return c.json(detectedObjects); - } catch (error) { - console.error('Object detection failed:', error); - - if (error instanceof AppError) { - throw error; - } - - throw new AppError(AppErrorCode.UNKNOWN_ERROR, { - message: 'Failed to detect objects', - userMessage: 'An error occurred while detecting objects. Please try again.', + if (!parsed.success) { + throw new AppError(AppErrorCode.INVALID_REQUEST, { + message: 'Image file is required', + userMessage: 'Please upload a valid image file.', }); } - }) - .post('/detect-object-and-draw', async (c) => { - try { - await getSession(c.req.raw); + const imageBuffer = Buffer.from(await parsed.data.image.arrayBuffer()); + const metadata = await sharp(imageBuffer).metadata(); + const imageWidth = metadata.width; + const imageHeight = metadata.height; - const parsedBody = await c.req.parseBody(); - const rawImage = parsedBody.image; - const imageCandidate = Array.isArray(rawImage) ? rawImage[0] : rawImage; - const parsed = ZDetectObjectsAndDrawRequestSchema.safeParse({ image: imageCandidate }); + if (!imageWidth || !imageHeight) { + throw new AppError(AppErrorCode.INVALID_REQUEST, { + message: 'Unable to extract image dimensions', + userMessage: 'The image file appears to be invalid or corrupted.', + }); + } - if (!parsed.success) { - throw new AppError(AppErrorCode.INVALID_REQUEST, { - message: 'Image file is required', - userMessage: 'Please upload a valid image file.', - }); - } - - const imageBlob = parsed.data.image; - const arrayBuffer = await imageBlob.arrayBuffer(); - const imageBuffer = Buffer.from(arrayBuffer); - const metadata = await sharp(imageBuffer).metadata(); - const imageWidth = metadata.width; - const imageHeight = metadata.height; - - console.log( - `[detect-object-and-draw] Original image dimensions: ${imageWidth}x${imageHeight}`, - ); - - if (!imageWidth || !imageHeight) { - throw new AppError(AppErrorCode.INVALID_REQUEST, { - message: 'Unable to extract image dimensions', - userMessage: 'The image file appears to be invalid or corrupted.', - }); - } - - console.log('[detect-object-and-draw] Compressing image for Gemini API...'); - console.log('[detect-object-and-draw] Calling Gemini API for form field detection...'); - const detectedObjects = await runObjectDetection(imageBuffer); - console.log('[detect-object-and-draw] Gemini API call completed'); - - console.log( - `[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`, - ); + const detectedFields = await runFormFieldDetection(imageBuffer); + if (env('NEXT_PUBLIC_AI_DEBUG_PREVIEW') === 'true') { const padding = { left: 80, top: 20, right: 20, bottom: 40 }; const canvas = new Canvas( imageWidth + padding.left + padding.right, @@ -210,7 +171,6 @@ export const aiRoute = new Hono() ctx.stroke(); } - // Horizontal grid lines (every 100 units on 0-1000 scale) for (let i = 0; i <= 1000; i += 100) { const y = padding.top + (i / 1000) * imageHeight; ctx.beginPath(); @@ -221,8 +181,8 @@ export const aiRoute = new Hono() const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF']; - detectedObjects.forEach((obj, index) => { - const [ymin, xmin, ymax, xmax] = obj.box_2d.map((coord) => coord / 1000); + detectedFields.forEach((field, index) => { + const [ymin, xmin, ymax, xmax] = field.box_2d.map((coord) => coord / 1000); const x = xmin * imageWidth + padding.left; const y = ymin * imageHeight + padding.top; @@ -235,7 +195,7 @@ export const aiRoute = new Hono() ctx.fillStyle = colors[index % colors.length]; ctx.font = '20px Arial'; - ctx.fillText(obj.label, x, y - 5); + ctx.fillText(field.label, x, y - 5); }); ctx.strokeStyle = '#000000'; @@ -284,26 +244,25 @@ export const aiRoute = new Hono() .replace(/[-:]/g, '') .replace(/\..+/, '') .replace('T', '_'); - const outputFilename = `detected_objects_${timestamp}.png`; - const outputPath = join(process.cwd(), outputFilename); + const outputFilename = `detected_form_fields_${timestamp}.png`; + const debugDir = join(process.cwd(), 'packages', 'assets', 'ai-previews'); + const outputPath = join(debugDir, outputFilename); + + await mkdir(debugDir, { recursive: true }); - console.log('[detect-object-and-draw] Converting canvas to PNG buffer...'); const pngBuffer = await canvas.toBuffer('png'); - console.log(`[detect-object-and-draw] Saving to: ${outputPath}`); await writeFile(outputPath, pngBuffer); - - console.log('[detect-object-and-draw] Image saved successfully!'); - return c.json(detectedObjects); - } catch (error) { - console.error('Object detection and drawing failed:', error); - - if (error instanceof AppError) { - throw error; - } - - throw new AppError(AppErrorCode.UNKNOWN_ERROR, { - message: 'Failed to detect objects and draw', - userMessage: 'An error occurred while detecting and drawing objects. Please try again.', - }); } - }); + + return c.json(detectedFields); + } catch (error) { + if (error instanceof AppError) { + throw error; + } + + throw new AppError(AppErrorCode.UNKNOWN_ERROR, { + message: 'Failed to detect form fields and generate preview', + userMessage: 'An error occurred while detecting form fields. Please try again.', + }); + } +}); diff --git a/apps/remix/server/api/ai.types.ts b/apps/remix/server/api/ai.types.ts index f5715ae8e..0bcedd48e 100644 --- a/apps/remix/server/api/ai.types.ts +++ b/apps/remix/server/api/ai.types.ts @@ -11,7 +11,7 @@ export const ZGenerateTextResponseSchema = z.object({ export type TGenerateTextRequest = z.infer; export type TGenerateTextResponse = z.infer; -export const ZDetectedObjectSchema = z.object({ +export const ZDetectedFormFieldSchema = z.object({ box_2d: z .array(z.number()) .length(4) @@ -32,19 +32,12 @@ export const ZDetectedObjectSchema = z.object({ .describe('Documenso field type inferred from nearby label text or visual characteristics'), }); -export const ZDetectObjectsRequestSchema = z.object({ - imagePath: z.string().min(1, 'Image path is required'), - // TODO: Replace with file upload - reference files.ts pattern -}); - -export const ZDetectObjectsResponseSchema = z.array(ZDetectedObjectSchema); - -export type TDetectedObject = z.infer; -export type TDetectObjectsRequest = z.infer; -export type TDetectObjectsResponse = z.infer; - -export const ZDetectObjectsAndDrawRequestSchema = z.object({ +export const ZDetectFormFieldsRequestSchema = z.object({ image: z.instanceof(Blob, { message: 'Image file is required' }), }); -export type TDetectObjectsAndDrawRequest = z.infer; +export const ZDetectFormFieldsResponseSchema = z.array(ZDetectedFormFieldSchema); + +export type TDetectedFormField = z.infer; +export type TDetectFormFieldsRequest = z.infer; +export type TDetectFormFieldsResponse = z.infer; diff --git a/turbo.json b/turbo.json index d2217d211..7ac75b249 100644 --- a/turbo.json +++ b/turbo.json @@ -46,6 +46,7 @@ "NEXT_PUBLIC_WEBAPP_URL", "NEXT_PRIVATE_INTERNAL_WEBAPP_URL", "NEXT_PUBLIC_POSTHOG_KEY", + "NEXT_PUBLIC_AI_DEBUG_PREVIEW", "NEXT_PUBLIC_FEATURE_BILLING_ENABLED", "NEXT_PUBLIC_DISABLE_SIGNUP", "NEXT_PRIVATE_PLAIN_API_KEY",