From 29be66a844a702324af17eb0c1fb213b58199998 Mon Sep 17 00:00:00 2001 From: Ephraim Atta-Duncan Date: Wed, 29 Oct 2025 23:03:58 +0000 Subject: [PATCH] feat: add AI field auto-placement with canvas registry --- .../envelope-editor-fields-page-renderer.tsx | 22 + .../envelope-editor-fields-page.tsx | 228 +++++++- .../envelope-editor-upload-page.tsx | 5 +- apps/remix/server/api/ai.ts | 510 +++++++++--------- apps/remix/server/api/ai.types.ts | 8 +- .../client-only/utils/page-canvas-registry.ts | 110 ++++ .../field-renderer/field-constants.ts | 19 + 7 files changed, 623 insertions(+), 279 deletions(-) create mode 100644 packages/lib/client-only/utils/page-canvas-registry.ts create mode 100644 packages/lib/universal/field-renderer/field-constants.ts diff --git a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx index c75fb52a5..f0af450d8 100644 --- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx +++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx @@ -11,6 +11,10 @@ import type { TLocalField } from '@documenso/lib/client-only/hooks/use-editor-fi import { usePageRenderer } from '@documenso/lib/client-only/hooks/use-page-renderer'; import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider'; import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider'; +import { + registerPageCanvas, + unregisterPageCanvas, +} from '@documenso/lib/client-only/utils/page-canvas-registry'; import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta'; import { MIN_FIELD_HEIGHT_PX, @@ -56,6 +60,15 @@ export default function EnvelopeEditorFieldsPageRenderer() { [editorFields.localFields, pageContext.pageNumber], ); + /** + * Cleanup: Unregister canvas when component unmounts + */ + useEffect(() => { + return () => { + unregisterPageCanvas(pageContext.pageNumber); + }; + }, [pageContext.pageNumber]); + const handleResizeOrMove = (event: KonvaEventObject) => { const { current: container } = canvasElement; @@ -214,6 +227,15 @@ export default function EnvelopeEditorFieldsPageRenderer() { currentStage.on('transformend', () => setIsFieldChanging(false)); currentPageLayer.batchDraw(); + + // Register this page's canvas references now that everything is initialized + if (canvasElement.current && currentStage) { + registerPageCanvas({ + pageNumber: pageContext.pageNumber, + pdfCanvas: canvasElement.current, + konvaStage: currentStage, + }); + } }; /** diff --git a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx index 4bd0915da..c04c9b85d 100644 --- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx +++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx @@ -1,4 +1,4 @@ -import { lazy, useEffect, useMemo } from 'react'; +import { lazy, useEffect, useMemo, useState } from 'react'; import type { MessageDescriptor } from '@lingui/core'; import { msg } from '@lingui/core/macro'; @@ -11,6 +11,10 @@ import { match } from 'ts-pattern'; import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider'; import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider'; +import { + compositePageToBlob, + getPageCanvasRefs, +} from '@documenso/lib/client-only/utils/page-canvas-registry'; import type { TCheckboxFieldMeta, TDateFieldMeta, @@ -24,12 +28,15 @@ import type { TSignatureFieldMeta, TTextFieldMeta, } from '@documenso/lib/types/field-meta'; +import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta'; import { canRecipientFieldsBeModified } from '@documenso/lib/utils/recipients'; import { AnimateGenericFadeInOut } from '@documenso/ui/components/animate/animate-generic-fade-in-out'; import PDFViewerKonvaLazy from '@documenso/ui/components/pdf-viewer/pdf-viewer-konva-lazy'; import { Alert, AlertDescription } from '@documenso/ui/primitives/alert'; +import { Button } from '@documenso/ui/primitives/button'; import { RecipientSelector } from '@documenso/ui/primitives/recipient-selector'; import { Separator } from '@documenso/ui/primitives/separator'; +import { useToast } from '@documenso/ui/primitives/use-toast'; import { EditorFieldCheckboxForm } from '~/components/forms/editor/editor-field-checkbox-form'; import { EditorFieldDateForm } from '~/components/forms/editor/editor-field-date-form'; @@ -49,6 +56,94 @@ const EnvelopeEditorFieldsPageRenderer = lazy( async () => import('./envelope-editor-fields-page-renderer'), ); +/** + * Enforces minimum field dimensions and centers the field when expanding to meet minimums. + * + * AI often detects form lines as very thin fields (0.2-0.5% height). This function ensures + * fields meet minimum usability requirements by expanding them to at least 30px height and + * 36px width, while keeping them centered on their original position. + * + * @param params - Field dimensions and page size + * @param params.positionX - Field X position as percentage (0-100) + * @param params.positionY - Field Y position as percentage (0-100) + * @param params.width - Field width as percentage (0-100) + * @param params.height - Field height as percentage (0-100) + * @param params.pageWidth - Page width in pixels + * @param params.pageHeight - Page height in pixels + * @returns Adjusted field dimensions with minimums enforced and centered + * + * @example + * // AI detected a thin line: 0.3% height + * const adjusted = enforceMinimumFieldDimensions({ + * positionX: 20, positionY: 50, width: 30, height: 0.3, + * pageWidth: 800, pageHeight: 1100 + * }); + * // Result: height expanded to ~2.7% (30px), centered on original position + */ +/** + * Enforces minimum field dimensions with centered expansion. + * + * If a field is smaller than the minimum width or height, it will be expanded + * to meet the minimum requirements while staying centered on its original position. + */ +const enforceMinimumFieldDimensions = (params: { + positionX: number; + positionY: number; + width: number; + height: number; + pageWidth: number; + pageHeight: number; +}): { + positionX: number; + positionY: number; + width: number; + height: number; +} => { + const MIN_HEIGHT_PX = 30; + const MIN_WIDTH_PX = 36; + + // Convert percentage to pixels to check against minimums + const widthPx = (params.width / 100) * params.pageWidth; + const heightPx = (params.height / 100) * params.pageHeight; + + let adjustedWidth = params.width; + let adjustedHeight = params.height; + let adjustedPositionX = params.positionX; + let adjustedPositionY = params.positionY; + + if (widthPx < MIN_WIDTH_PX) { + const centerXPx = (params.positionX / 100) * params.pageWidth + widthPx / 2; + adjustedWidth = (MIN_WIDTH_PX / params.pageWidth) * 100; + adjustedPositionX = ((centerXPx - MIN_WIDTH_PX / 2) / params.pageWidth) * 100; + + if (adjustedPositionX < 0) { + adjustedPositionX = 0; + } else if (adjustedPositionX + adjustedWidth > 100) { + adjustedPositionX = 100 - adjustedWidth; + } + } + + if (heightPx < MIN_HEIGHT_PX) { + const centerYPx = (params.positionY / 100) * params.pageHeight + heightPx / 2; + adjustedHeight = (MIN_HEIGHT_PX / params.pageHeight) * 100; + + adjustedPositionY = ((centerYPx - MIN_HEIGHT_PX / 2) / params.pageHeight) * 100; + + if (adjustedPositionY < 0) { + adjustedPositionY = 0; + } else if (adjustedPositionY + adjustedHeight > 100) { + adjustedPositionY = 100 - adjustedHeight; + } + } + + return { + positionX: adjustedPositionX, + positionY: adjustedPositionY, + width: adjustedWidth, + height: adjustedHeight, + }; +}; + const FieldSettingsTypeTranslations: Record = { [FieldType.SIGNATURE]: msg`Signature Settings`, [FieldType.FREE_SIGNATURE]: msg`Free Signature Settings`, @@ -69,6 +164,9 @@ export const EnvelopeEditorFieldsPage = () => { const { currentEnvelopeItem } = useCurrentEnvelopeRender(); const { t } = useLingui(); + const { toast } = useToast(); + + const [isAutoAddingFields, setIsAutoAddingFields] = useState(false); const selectedField = useMemo( () => structuredClone(editorFields.selectedField), @@ -187,6 +285,134 @@ export const EnvelopeEditorFieldsPage = () => { selectedRecipientId={editorFields.selectedRecipient?.id ?? null} selectedEnvelopeItemId={currentEnvelopeItem?.id ?? null} /> + + {/* Field details section. */} diff --git a/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx b/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx index fa19bb6a1..caeea13e7 100644 --- a/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx +++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx @@ -1,12 +1,11 @@ import { useMemo, useState } from 'react'; -import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd'; import type { DropResult } from '@hello-pangea/dnd'; +import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd'; import { msg } from '@lingui/core/macro'; import { Trans, useLingui } from '@lingui/react/macro'; import { DocumentStatus } from '@prisma/client'; -import { FileWarningIcon, GripVerticalIcon, Loader2 } from 'lucide-react'; -import { X } from 'lucide-react'; +import { FileWarningIcon, GripVerticalIcon, Loader2, X } from 'lucide-react'; import { ErrorCode as DropzoneErrorCode, type FileRejection } from 'react-dropzone'; import { Link } from 'react-router'; diff --git a/apps/remix/server/api/ai.ts b/apps/remix/server/api/ai.ts index 4929a71e3..a37aec185 100644 --- a/apps/remix/server/api/ai.ts +++ b/apps/remix/server/api/ai.ts @@ -13,7 +13,6 @@ import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error'; import type { HonoEnv } from '../router'; import { - type TDetectObjectsAndDrawResponse, type TDetectObjectsResponse, type TGenerateTextResponse, ZDetectObjectsAndDrawRequestSchema, @@ -41,6 +40,88 @@ async function resizeAndCompressImage(imageBuffer: Buffer): Promise { return await sharp(imageBuffer).jpeg({ quality: 70 }).toBuffer(); } +const detectObjectsPrompt = `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform. + +IMPORTANT RULES: +1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data) +2. Analyze nearby text labels to determine the field type +3. Return bounding boxes for the fillable area only, NOT the label text +4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale + +FIELD TYPES TO DETECT: +• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____' +• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields +• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name' +• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:' +• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____' +• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes +• RADIO - Empty radio button circles (○) in groups, typically circular selection options +• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#' +• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select' +• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain + +DETECTION GUIDELINES: +- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type +- If you're uncertain which type fits best, default to TEXT +- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label +- Signature fields are often longer horizontal lines or larger boxes +- Date fields often show format hints or date separators (slashes, dashes) +- Look for visual patterns: underscores (____), horizontal lines, box outlines +- Return coordinates for the fillable area, not the descriptive label text + +COORDINATE SYSTEM: +- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale +- Top-left corner: ymin and xmin close to 0 +- Bottom-right corner: ymax and xmax close to 1000 +- Coordinates represent positions on a 1000x1000 grid overlaid on the image + +FIELD SIZING STRATEGY FOR LINE-BASED FIELDS: +When detecting thin horizontal lines for SIGNATURE, INITIALS, NAME, EMAIL, DATE, TEXT, or NUMBER fields: +1. Analyze the visual context around the detected line: + - Look at the empty space ABOVE the detected line + - Observe the spacing to any text labels, headers, or other form elements above + - Assess what would be a reasonable field height to make the field clearly visible when filled +2. Expand UPWARD from the detected line to create a usable field: + - Keep ymax (bottom) at the detected line position (the line becomes the bottom edge) + - Extend ymin (top) upward into the available whitespace + - Aim to use 60-80% of the clear whitespace above the line, while being reasonable + - The expanded field should provide comfortable space for signing/writing (minimum 30 units tall) +3. Apply minimum dimensions: height at least 30 units (3% of 1000-scale), width at least 36 units +4. Ensure ymin >= 0 (do not go off-page). If ymin would be negative, clamp to 0 +5. Do NOT apply this expansion to CHECKBOX, RADIO, or DROPDOWN fields - use detected dimensions for those +6. Example: If you detect a signature line at ymax=500 with clear whitespace extending up to y=400: + - Available whitespace: 100 units + - Use 60-80% of that: 60-80 units + - Expanded field: [ymin=420, xmin=200, ymax=500, xmax=600] (creates 80-unit tall field) + - This gives comfortable signing space while respecting the form layout`; + +const runObjectDetection = async (imageBuffer: Buffer): Promise => { + const compressedImageBuffer = await resizeAndCompressImage(imageBuffer); + const base64Image = compressedImageBuffer.toString('base64'); + + const result = await generateObject({ + model: google('gemini-2.5-pro'), + schema: ZDetectObjectsResponseSchema, + messages: [ + { + role: 'user', + content: [ + { + type: 'image', + image: `data:image/jpeg;base64,${base64Image}`, + }, + { + type: 'text', + text: detectObjectsPrompt, + }, + ], + }, + ], + }); + + return result.object; +}; + export const aiRoute = new Hono() .use( '*', @@ -85,63 +166,9 @@ export const aiRoute = new Hono() const { imagePath } = c.req.valid('json'); const imageBuffer = await readFile(imagePath); - const compressedImageBuffer = await resizeAndCompressImage(imageBuffer); - const base64Image = compressedImageBuffer.toString('base64'); + const detectedObjects = await runObjectDetection(imageBuffer); - const result = await generateObject({ - model: google('gemini-2.5-pro'), - schema: ZDetectObjectsResponseSchema, - messages: [ - { - role: 'user', - content: [ - { - type: 'image', - image: `data:image/jpeg;base64,${base64Image}`, - }, - { - type: 'text', - text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform. - -IMPORTANT RULES: -1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data) -2. Analyze nearby text labels to determine the field type -3. Return bounding boxes for the fillable area only, NOT the label text -4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale - -FIELD TYPES TO DETECT: -• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____' -• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields -• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name' -• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:' -• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____' -• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes -• RADIO - Empty radio button circles (○) in groups, typically circular selection options -• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#' -• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select' -• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain - -DETECTION GUIDELINES: -- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type -- If you're uncertain which type fits best, default to TEXT -- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label -- Signature fields are often longer horizontal lines or larger boxes -- Date fields often show format hints or date separators (slashes, dashes) -- Look for visual patterns: underscores (____), horizontal lines, box outlines -- Return coordinates for the fillable area, not the descriptive label text - -COORDINATE SYSTEM: -- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale -- Top-left corner: ymin and xmin close to 0 -- Bottom-right corner: ymax and xmax close to 1000 -- Coordinates represent positions on a 1000x1000 grid overlaid on the image`, - }, - ], - }, - ], - }); - - return c.json(result.object); + return c.json(detectedObjects); } catch (error) { console.error('Object detection failed:', error); @@ -156,218 +183,165 @@ COORDINATE SYSTEM: } }) - .post( - '/detect-object-and-draw', - sValidator('json', ZDetectObjectsAndDrawRequestSchema), - async (c) => { - try { - await getSession(c.req.raw); + .post('/detect-object-and-draw', async (c) => { + try { + await getSession(c.req.raw); - const { imagePath } = c.req.valid('json'); + const parsedBody = await c.req.parseBody(); + const rawImage = parsedBody.image; + const imageCandidate = Array.isArray(rawImage) ? rawImage[0] : rawImage; + const parsed = ZDetectObjectsAndDrawRequestSchema.safeParse({ image: imageCandidate }); - console.log(`[detect-object-and-draw] Reading image from: ${imagePath}`); - - const imageBuffer = await readFile(imagePath); - const metadata = await sharp(imageBuffer).metadata(); - const imageWidth = metadata.width; - const imageHeight = metadata.height; - - console.log( - `[detect-object-and-draw] Original image dimensions: ${imageWidth}x${imageHeight}`, - ); - - if (!imageWidth || !imageHeight) { - throw new AppError(AppErrorCode.INVALID_REQUEST, { - message: 'Unable to extract image dimensions', - userMessage: 'The image file appears to be invalid or corrupted.', - }); - } - - console.log('[detect-object-and-draw] Compressing image for Gemini API...'); - const compressedImageBuffer = await resizeAndCompressImage(imageBuffer); - const base64Image = compressedImageBuffer.toString('base64'); - - console.log('[detect-object-and-draw] Calling Gemini API for form field detection...'); - const result = await generateObject({ - model: google('gemini-2.5-pro'), - schema: ZDetectObjectsResponseSchema, - messages: [ - { - role: 'user', - content: [ - { - type: 'image', - image: `data:image/jpeg;base64,${base64Image}`, - }, - { - type: 'text', - text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform. - -IMPORTANT RULES: -1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data) -2. Analyze nearby text labels to determine the field type -3. Return bounding boxes for the fillable area only, NOT the label text -4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale - -FIELD TYPES TO DETECT: -• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____' -• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields -• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name' -• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:' -• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____' -• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes -• RADIO - Empty radio button circles (○) in groups, typically circular selection options -• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#' -• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select' -• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain - -DETECTION GUIDELINES: -- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type -- If you're uncertain which type fits best, default to TEXT -- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label -- Signature fields are often longer horizontal lines or larger boxes -- Date fields often show format hints or date separators (slashes, dashes) -- Look for visual patterns: underscores (____), horizontal lines, box outlines -- Return coordinates for the fillable area, not the descriptive label text - -COORDINATE SYSTEM: -- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale -- Top-left corner: ymin and xmin close to 0 -- Bottom-right corner: ymax and xmax close to 1000 -- Coordinates represent positions on a 1000x1000 grid overlaid on the image`, - }, - ], - }, - ], - }); - console.log('[detect-object-and-draw] Gemini API call completed'); - - const detectedObjects = result.object; - - console.log( - `[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`, - ); - - const padding = { left: 80, top: 20, right: 20, bottom: 40 }; - const canvas = new Canvas( - imageWidth + padding.left + padding.right, - imageHeight + padding.top + padding.bottom, - ); - const ctx = canvas.getContext('2d'); - - const img = new Image(); - img.src = imageBuffer; - ctx.drawImage(img, padding.left, padding.top); - - ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)'; - ctx.lineWidth = 1; - - for (let i = 0; i <= 1000; i += 100) { - const x = padding.left + (i / 1000) * imageWidth; - ctx.beginPath(); - ctx.moveTo(x, padding.top); - ctx.lineTo(x, imageHeight + padding.top); - ctx.stroke(); - } - - // Horizontal grid lines (every 100 units on 0-1000 scale) - for (let i = 0; i <= 1000; i += 100) { - const y = padding.top + (i / 1000) * imageHeight; - ctx.beginPath(); - ctx.moveTo(padding.left, y); - ctx.lineTo(imageWidth + padding.left, y); - ctx.stroke(); - } - - const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF']; - - detectedObjects.forEach((obj, index) => { - const [ymin, xmin, ymax, xmax] = obj.box_2d.map((coord) => coord / 1000); - - const x = xmin * imageWidth + padding.left; - const y = ymin * imageHeight + padding.top; - const width = (xmax - xmin) * imageWidth; - const height = (ymax - ymin) * imageHeight; - - ctx.strokeStyle = colors[index % colors.length]; - ctx.lineWidth = 5; - ctx.strokeRect(x, y, width, height); - - ctx.fillStyle = colors[index % colors.length]; - ctx.font = '20px Arial'; - ctx.fillText(obj.label, x, y - 5); - }); - - ctx.strokeStyle = '#000000'; - ctx.lineWidth = 1; - ctx.font = '26px Arial'; - - ctx.beginPath(); - ctx.moveTo(padding.left, padding.top); - ctx.lineTo(padding.left, imageHeight + padding.top); - ctx.stroke(); - - ctx.textAlign = 'right'; - ctx.textBaseline = 'middle'; - for (let i = 0; i <= 1000; i += 100) { - const y = padding.top + (i / 1000) * imageHeight; - ctx.fillStyle = '#000000'; - ctx.fillText(i.toString(), padding.left - 5, y); - - ctx.beginPath(); - ctx.moveTo(padding.left - 5, y); - ctx.lineTo(padding.left, y); - ctx.stroke(); - } - - ctx.beginPath(); - ctx.moveTo(padding.left, imageHeight + padding.top); - ctx.lineTo(imageWidth + padding.left, imageHeight + padding.top); - ctx.stroke(); - - ctx.textAlign = 'center'; - ctx.textBaseline = 'top'; - for (let i = 0; i <= 1000; i += 100) { - const x = padding.left + (i / 1000) * imageWidth; - ctx.fillStyle = '#000000'; - ctx.fillText(i.toString(), x, imageHeight + padding.top + 5); - - ctx.beginPath(); - ctx.moveTo(x, imageHeight + padding.top); - ctx.lineTo(x, imageHeight + padding.top + 5); - ctx.stroke(); - } - - const now = new Date(); - const timestamp = now - .toISOString() - .replace(/[-:]/g, '') - .replace(/\..+/, '') - .replace('T', '_'); - const outputFilename = `detected_objects_${timestamp}.png`; - const outputPath = join(process.cwd(), outputFilename); - - console.log('[detect-object-and-draw] Converting canvas to PNG buffer...'); - const pngBuffer = await canvas.toBuffer('png'); - console.log(`[detect-object-and-draw] Saving to: ${outputPath}`); - await writeFile(outputPath, pngBuffer); - - console.log('[detect-object-and-draw] Image saved successfully!'); - return c.json({ - outputPath, - detectedObjects, - }); - } catch (error) { - console.error('Object detection and drawing failed:', error); - - if (error instanceof AppError) { - throw error; - } - - throw new AppError(AppErrorCode.UNKNOWN_ERROR, { - message: 'Failed to detect objects and draw', - userMessage: 'An error occurred while detecting and drawing objects. Please try again.', + if (!parsed.success) { + throw new AppError(AppErrorCode.INVALID_REQUEST, { + message: 'Image file is required', + userMessage: 'Please upload a valid image file.', }); } - }, - ); + + const imageBlob = parsed.data.image; + const arrayBuffer = await imageBlob.arrayBuffer(); + const imageBuffer = Buffer.from(arrayBuffer); + const metadata = await sharp(imageBuffer).metadata(); + const imageWidth = metadata.width; + const imageHeight = metadata.height; + + console.log( + `[detect-object-and-draw] Original image dimensions: ${imageWidth}x${imageHeight}`, + ); + + if (!imageWidth || !imageHeight) { + throw new AppError(AppErrorCode.INVALID_REQUEST, { + message: 'Unable to extract image dimensions', + userMessage: 'The image file appears to be invalid or corrupted.', + }); + } + + console.log('[detect-object-and-draw] Compressing image for Gemini API...'); + console.log('[detect-object-and-draw] Calling Gemini API for form field detection...'); + const detectedObjects = await runObjectDetection(imageBuffer); + console.log('[detect-object-and-draw] Gemini API call completed'); + + console.log( + `[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`, + ); + + const padding = { left: 80, top: 20, right: 20, bottom: 40 }; + const canvas = new Canvas( + imageWidth + padding.left + padding.right, + imageHeight + padding.top + padding.bottom, + ); + const ctx = canvas.getContext('2d'); + + const img = new Image(); + img.src = imageBuffer; + ctx.drawImage(img, padding.left, padding.top); + + ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)'; + ctx.lineWidth = 1; + + for (let i = 0; i <= 1000; i += 100) { + const x = padding.left + (i / 1000) * imageWidth; + ctx.beginPath(); + ctx.moveTo(x, padding.top); + ctx.lineTo(x, imageHeight + padding.top); + ctx.stroke(); + } + + // Horizontal grid lines (every 100 units on 0-1000 scale) + for (let i = 0; i <= 1000; i += 100) { + const y = padding.top + (i / 1000) * imageHeight; + ctx.beginPath(); + ctx.moveTo(padding.left, y); + ctx.lineTo(imageWidth + padding.left, y); + ctx.stroke(); + } + + const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF']; + + detectedObjects.forEach((obj, index) => { + const [ymin, xmin, ymax, xmax] = obj.box_2d.map((coord) => coord / 1000); + + const x = xmin * imageWidth + padding.left; + const y = ymin * imageHeight + padding.top; + const width = (xmax - xmin) * imageWidth; + const height = (ymax - ymin) * imageHeight; + + ctx.strokeStyle = colors[index % colors.length]; + ctx.lineWidth = 5; + ctx.strokeRect(x, y, width, height); + + ctx.fillStyle = colors[index % colors.length]; + ctx.font = '20px Arial'; + ctx.fillText(obj.label, x, y - 5); + }); + + ctx.strokeStyle = '#000000'; + ctx.lineWidth = 1; + ctx.font = '26px Arial'; + + ctx.beginPath(); + ctx.moveTo(padding.left, padding.top); + ctx.lineTo(padding.left, imageHeight + padding.top); + ctx.stroke(); + + ctx.textAlign = 'right'; + ctx.textBaseline = 'middle'; + for (let i = 0; i <= 1000; i += 100) { + const y = padding.top + (i / 1000) * imageHeight; + ctx.fillStyle = '#000000'; + ctx.fillText(i.toString(), padding.left - 5, y); + + ctx.beginPath(); + ctx.moveTo(padding.left - 5, y); + ctx.lineTo(padding.left, y); + ctx.stroke(); + } + + ctx.beginPath(); + ctx.moveTo(padding.left, imageHeight + padding.top); + ctx.lineTo(imageWidth + padding.left, imageHeight + padding.top); + ctx.stroke(); + + ctx.textAlign = 'center'; + ctx.textBaseline = 'top'; + for (let i = 0; i <= 1000; i += 100) { + const x = padding.left + (i / 1000) * imageWidth; + ctx.fillStyle = '#000000'; + ctx.fillText(i.toString(), x, imageHeight + padding.top + 5); + + ctx.beginPath(); + ctx.moveTo(x, imageHeight + padding.top); + ctx.lineTo(x, imageHeight + padding.top + 5); + ctx.stroke(); + } + + const now = new Date(); + const timestamp = now + .toISOString() + .replace(/[-:]/g, '') + .replace(/\..+/, '') + .replace('T', '_'); + const outputFilename = `detected_objects_${timestamp}.png`; + const outputPath = join(process.cwd(), outputFilename); + + console.log('[detect-object-and-draw] Converting canvas to PNG buffer...'); + const pngBuffer = await canvas.toBuffer('png'); + console.log(`[detect-object-and-draw] Saving to: ${outputPath}`); + await writeFile(outputPath, pngBuffer); + + console.log('[detect-object-and-draw] Image saved successfully!'); + return c.json(detectedObjects); + } catch (error) { + console.error('Object detection and drawing failed:', error); + + if (error instanceof AppError) { + throw error; + } + + throw new AppError(AppErrorCode.UNKNOWN_ERROR, { + message: 'Failed to detect objects and draw', + userMessage: 'An error occurred while detecting and drawing objects. Please try again.', + }); + } + }); diff --git a/apps/remix/server/api/ai.types.ts b/apps/remix/server/api/ai.types.ts index 1293d6c10..f5715ae8e 100644 --- a/apps/remix/server/api/ai.types.ts +++ b/apps/remix/server/api/ai.types.ts @@ -44,13 +44,7 @@ export type TDetectObjectsRequest = z.infer; export type TDetectObjectsResponse = z.infer; export const ZDetectObjectsAndDrawRequestSchema = z.object({ - imagePath: z.string().min(1, 'Image path is required'), -}); - -export const ZDetectObjectsAndDrawResponseSchema = z.object({ - outputPath: z.string().describe('Path to the generated image with bounding boxes'), - detectedObjects: z.array(ZDetectedObjectSchema).describe('Array of detected objects'), + image: z.instanceof(Blob, { message: 'Image file is required' }), }); export type TDetectObjectsAndDrawRequest = z.infer; -export type TDetectObjectsAndDrawResponse = z.infer; diff --git a/packages/lib/client-only/utils/page-canvas-registry.ts b/packages/lib/client-only/utils/page-canvas-registry.ts new file mode 100644 index 000000000..a96b06432 --- /dev/null +++ b/packages/lib/client-only/utils/page-canvas-registry.ts @@ -0,0 +1,110 @@ +import type Konva from 'konva'; + +/** + * Represents canvas references for a specific PDF page. + */ +export interface PageCanvasRefs { + /** The page number (1-indexed) */ + pageNumber: number; + /** The canvas element containing the rendered PDF */ + pdfCanvas: HTMLCanvasElement; + /** The Konva stage containing field overlays */ + konvaStage: Konva.Stage; +} + +/** + * Module-level registry to store canvas references by page number. + * This allows any component to access page canvases without prop drilling. + */ +const pageCanvasRegistry = new Map(); + +/** + * Register a page's canvas references. + * Call this when a page renderer mounts and has valid canvas refs. + * + * @param refs - The canvas references to register + */ +export const registerPageCanvas = (refs: PageCanvasRefs): void => { + pageCanvasRegistry.set(refs.pageNumber, refs); +}; + +/** + * Unregister a page's canvas references. + * Call this when a page renderer unmounts to prevent memory leaks. + * + * @param pageNumber - The page number to unregister + */ +export const unregisterPageCanvas = (pageNumber: number): void => { + pageCanvasRegistry.delete(pageNumber); +}; + +/** + * Get canvas references for a specific page. + * + * @param pageNumber - The page number to retrieve + * @returns The canvas references, or undefined if not registered + */ +export const getPageCanvasRefs = (pageNumber: number): PageCanvasRefs | undefined => { + return pageCanvasRegistry.get(pageNumber); +}; + +/** + * Get all registered page numbers. + * + * @returns Array of page numbers currently registered + */ +export const getRegisteredPageNumbers = (): number[] => { + return Array.from(pageCanvasRegistry.keys()).sort((a, b) => a - b); +}; + +/** + * Composite a PDF page with its field overlays into a single PNG Blob. + * This creates a temporary canvas, draws the PDF canvas first (background), + * then draws the Konva canvas on top (field overlays). + * + * @param pageNumber - The page number to composite (1-indexed) + * @returns Promise that resolves to a PNG Blob, or null if page not found or compositing fails + */ +export const compositePageToBlob = async (pageNumber: number): Promise => { + const refs = getPageCanvasRefs(pageNumber); + + if (!refs) { + console.warn(`Page ${pageNumber} is not registered for canvas capture`); + return null; + } + + try { + // Create temporary canvas with same dimensions as PDF canvas + const tempCanvas = document.createElement('canvas'); + tempCanvas.width = refs.pdfCanvas.width; + tempCanvas.height = refs.pdfCanvas.height; + + const ctx = tempCanvas.getContext('2d'); + if (!ctx) { + console.error('Failed to get 2D context for temporary canvas'); + return null; + } + + // Draw PDF canvas first (background layer) + ctx.drawImage(refs.pdfCanvas, 0, 0); + + // Get Konva canvas and draw on top (field overlays) + // Note: Konva's toCanvas() returns a new canvas with all layers rendered + const konvaCanvas = refs.konvaStage.toCanvas(); + ctx.drawImage(konvaCanvas, 0, 0); + + // Convert to PNG Blob + return new Promise((resolve, reject) => { + tempCanvas.toBlob((blob) => { + if (blob) { + resolve(blob); + } else { + reject(new Error('Failed to convert canvas to blob')); + } + }, 'image/png'); + }); + } catch (error) { + console.error(`Error compositing page ${pageNumber}:`, error); + return null; + } +}; diff --git a/packages/lib/universal/field-renderer/field-constants.ts b/packages/lib/universal/field-renderer/field-constants.ts new file mode 100644 index 000000000..4473b439e --- /dev/null +++ b/packages/lib/universal/field-renderer/field-constants.ts @@ -0,0 +1,19 @@ +/** + * Shared constants for field dimension enforcement. + * + * These constants ensure consistency between: + * 1. AI prompt (server/api/ai.ts) - instructs Gemini on minimum field dimensions + * 2. Client enforcement (envelope-editor-fields-page.tsx) - fallback validation + */ + +/** + * Minimum field height in pixels. + * Fields smaller than this will be expanded to meet minimum usability requirements. + */ +export const MIN_FIELD_HEIGHT_PX = 30; + +/** + * Minimum field width in pixels. + * Fields smaller than this will be expanded to meet minimum usability requirements. + */ +export const MIN_FIELD_WIDTH_PX = 36;