feat: add AI field auto-placement with canvas registry

2025-11-14 00:32:43 +10:00 · 2025-10-29 23:03:58 +00:00
parent 94098bd762
commit 29be66a844
7 changed files with 623 additions and 279 deletions
--- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx
+++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx
@ -11,6 +11,10 @@ import type { TLocalField } from '@documenso/lib/client-only/hooks/use-editor-fi
 import { usePageRenderer } from '@documenso/lib/client-only/hooks/use-page-renderer';
 import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
 import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
 import {
  registerPageCanvas,
  unregisterPageCanvas,
 } from '@documenso/lib/client-only/utils/page-canvas-registry';
 import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
 import {
  MIN_FIELD_HEIGHT_PX,
@ -56,6 +60,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
    [editorFields.localFields, pageContext.pageNumber],
  );
  /**
   * Cleanup: Unregister canvas when component unmounts
   */
  useEffect(() => {
    return () => {
      unregisterPageCanvas(pageContext.pageNumber);
    };
  }, [pageContext.pageNumber]);
  const handleResizeOrMove = (event: KonvaEventObject<Event>) => {
    const { current: container } = canvasElement;
@ -214,6 +227,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
    currentStage.on('transformend', () => setIsFieldChanging(false));
    currentPageLayer.batchDraw();
    // Register this page's canvas references now that everything is initialized
    if (canvasElement.current && currentStage) {
      registerPageCanvas({
        pageNumber: pageContext.pageNumber,
        pdfCanvas: canvasElement.current,
        konvaStage: currentStage,
      });
    }
  };
  /**
--- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx
+++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx
@ -1,4 +1,4 @@
-import { lazy, useEffect, useMemo } from 'react';
+import { lazy, useEffect, useMemo, useState } from 'react';
 import type { MessageDescriptor } from '@lingui/core';
 import { msg } from '@lingui/core/macro';
@ -11,6 +11,10 @@ import { match } from 'ts-pattern';
 import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
 import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
 import {
  compositePageToBlob,
  getPageCanvasRefs,
 } from '@documenso/lib/client-only/utils/page-canvas-registry';
 import type {
  TCheckboxFieldMeta,
  TDateFieldMeta,
@ -24,12 +28,15 @@ import type {
  TSignatureFieldMeta,
  TTextFieldMeta,
 } from '@documenso/lib/types/field-meta';
 import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
 import { canRecipientFieldsBeModified } from '@documenso/lib/utils/recipients';
 import { AnimateGenericFadeInOut } from '@documenso/ui/components/animate/animate-generic-fade-in-out';
 import PDFViewerKonvaLazy from '@documenso/ui/components/pdf-viewer/pdf-viewer-konva-lazy';
 import { Alert, AlertDescription } from '@documenso/ui/primitives/alert';
 import { Button } from '@documenso/ui/primitives/button';
 import { RecipientSelector } from '@documenso/ui/primitives/recipient-selector';
 import { Separator } from '@documenso/ui/primitives/separator';
 import { useToast } from '@documenso/ui/primitives/use-toast';
 import { EditorFieldCheckboxForm } from '~/components/forms/editor/editor-field-checkbox-form';
 import { EditorFieldDateForm } from '~/components/forms/editor/editor-field-date-form';
@ -49,6 +56,94 @@ const EnvelopeEditorFieldsPageRenderer = lazy(
  async () => import('./envelope-editor-fields-page-renderer'),
 );
 /**
 * Enforces minimum field dimensions and centers the field when expanding to meet minimums.
 *
 * AI often detects form lines as very thin fields (0.2-0.5% height). This function ensures
 * fields meet minimum usability requirements by expanding them to at least 30px height and
 * 36px width, while keeping them centered on their original position.
 *
 * @param params - Field dimensions and page size
 * @param params.positionX - Field X position as percentage (0-100)
 * @param params.positionY - Field Y position as percentage (0-100)
 * @param params.width - Field width as percentage (0-100)
 * @param params.height - Field height as percentage (0-100)
 * @param params.pageWidth - Page width in pixels
 * @param params.pageHeight - Page height in pixels
 * @returns Adjusted field dimensions with minimums enforced and centered
 *
 * @example
 * // AI detected a thin line: 0.3% height
 * const adjusted = enforceMinimumFieldDimensions({
 *   positionX: 20, positionY: 50, width: 30, height: 0.3,
 *   pageWidth: 800, pageHeight: 1100
 * });
 * // Result: height expanded to ~2.7% (30px), centered on original position
 */
 /**
 * Enforces minimum field dimensions with centered expansion.
 *
 * If a field is smaller than the minimum width or height, it will be expanded
 * to meet the minimum requirements while staying centered on its original position.
 */
 const enforceMinimumFieldDimensions = (params: {
  positionX: number;
  positionY: number;
  width: number;
  height: number;
  pageWidth: number;
  pageHeight: number;
 }): {
  positionX: number;
  positionY: number;
  width: number;
  height: number;
 } => {
  const MIN_HEIGHT_PX = 30;
  const MIN_WIDTH_PX = 36;
  // Convert percentage to pixels to check against minimums
  const widthPx = (params.width / 100) * params.pageWidth;
  const heightPx = (params.height / 100) * params.pageHeight;
  let adjustedWidth = params.width;
  let adjustedHeight = params.height;
  let adjustedPositionX = params.positionX;
  let adjustedPositionY = params.positionY;
  if (widthPx < MIN_WIDTH_PX) {
    const centerXPx = (params.positionX / 100) * params.pageWidth + widthPx / 2;
    adjustedWidth = (MIN_WIDTH_PX / params.pageWidth) * 100;
    adjustedPositionX = ((centerXPx - MIN_WIDTH_PX / 2) / params.pageWidth) * 100;
    if (adjustedPositionX < 0) {
      adjustedPositionX = 0;
    } else if (adjustedPositionX + adjustedWidth > 100) {
      adjustedPositionX = 100 - adjustedWidth;
    }
  }
  if (heightPx < MIN_HEIGHT_PX) {
    const centerYPx = (params.positionY / 100) * params.pageHeight + heightPx / 2;
    adjustedHeight = (MIN_HEIGHT_PX / params.pageHeight) * 100;
    adjustedPositionY = ((centerYPx - MIN_HEIGHT_PX / 2) / params.pageHeight) * 100;
    if (adjustedPositionY < 0) {
      adjustedPositionY = 0;
    } else if (adjustedPositionY + adjustedHeight > 100) {
      adjustedPositionY = 100 - adjustedHeight;
    }
  }
  return {
    positionX: adjustedPositionX,
    positionY: adjustedPositionY,
    width: adjustedWidth,
    height: adjustedHeight,
  };
 };
 const FieldSettingsTypeTranslations: Record<FieldType, MessageDescriptor> = {
  [FieldType.SIGNATURE]: msg`Signature Settings`,
  [FieldType.FREE_SIGNATURE]: msg`Free Signature Settings`,
@ -69,6 +164,9 @@ export const EnvelopeEditorFieldsPage = () => {
  const { currentEnvelopeItem } = useCurrentEnvelopeRender();
  const { t } = useLingui();
  const { toast } = useToast();
  const [isAutoAddingFields, setIsAutoAddingFields] = useState(false);
  const selectedField = useMemo(
    () => structuredClone(editorFields.selectedField),
@ -187,6 +285,134 @@ export const EnvelopeEditorFieldsPage = () => {
              selectedRecipientId={editorFields.selectedRecipient?.id ?? null}
              selectedEnvelopeItemId={currentEnvelopeItem?.id ?? null}
            />
            <Button
              className="mt-4 w-full"
              variant="outline"
              disabled={isAutoAddingFields}
              onClick={async () => {
                setIsAutoAddingFields(true);
                try {
                  const blob = await compositePageToBlob(1);
                  if (!blob) {
                    toast({
                      title: t`Error`,
                      description: t`Failed to capture page. Please ensure the document is fully loaded.`,
                      variant: 'destructive',
                    });
                    return;
                  }
                  console.log('Successfully captured page 1 as PNG Blob:', {
                    size: `${(blob.size / 1024).toFixed(2)} KB`,
                    type: blob.type,
                  });
                  console.log('Blob object:', blob);
                  console.log('[Auto Add Fields] Sending image to AI endpoint...');
                  const formData = new FormData();
                  formData.append('image', blob, 'page-1.png');
                  const response = await fetch('/api/ai/detect-object-and-draw', {
                    method: 'POST',
                    body: formData,
                    credentials: 'include',
                  });
                  if (!response.ok) {
                    throw new Error(`AI detection failed: ${response.statusText}`);
                  }
                  const detectedFields = await response.json();
                  console.log(
                    `[Auto Add Fields] Detected ${detectedFields.length} fields:`,
                    detectedFields,
                  );
                  if (!editorFields.selectedRecipient || !currentEnvelopeItem) {
                    toast({
                      title: t`Warning`,
                      description: t`Please select a recipient before adding fields.`,
                      variant: 'destructive',
                    });
                    return;
                  }
                  const pageCanvasRefs = getPageCanvasRefs(1);
                  if (!pageCanvasRefs) {
                    console.warn(
                      '[Auto Add Fields] Could not get page dimensions for minimum field enforcement',
                    );
                  }
                  let addedCount = 0;
                  for (const detected of detectedFields) {
                    const [ymin, xmin, ymax, xmax] = detected.box_2d;
                    let positionX = (xmin / 1000) * 100;
                    let positionY = (ymin / 1000) * 100;
                    let width = ((xmax - xmin) / 1000) * 100;
                    let height = ((ymax - ymin) / 1000) * 100;
                    if (pageCanvasRefs) {
                      const adjusted = enforceMinimumFieldDimensions({
                        positionX,
                        positionY,
                        width,
                        height,
                        pageWidth: pageCanvasRefs.pdfCanvas.width,
                        pageHeight: pageCanvasRefs.pdfCanvas.height,
                      });
                      positionX = adjusted.positionX;
                      positionY = adjusted.positionY;
                      width = adjusted.width;
                      height = adjusted.height;
                    }
                    const fieldType = detected.label as FieldType;
                    try {
                      editorFields.addField({
                        envelopeItemId: currentEnvelopeItem.id,
                        page: 1,
                        type: fieldType,
                        positionX,
                        positionY,
                        width,
                        height,
                        recipientId: editorFields.selectedRecipient.id,
                        fieldMeta: structuredClone(FIELD_META_DEFAULT_VALUES[fieldType]),
                      });
                      addedCount++;
                    } catch (error) {
                      console.error(`Failed to add ${fieldType} field:`, error);
                    }
                  }
                  console.log(
                    `[Auto Add Fields] Successfully added ${addedCount} fields to the document`,
                  );
                  toast({
                    title: t`Success`,
                    description: t`Added ${addedCount} fields to the document`,
                  });
                } catch (error) {
                  console.error('Auto add fields error:', error);
                  toast({
                    title: t`Error`,
                    description: t`An unexpected error occurred while capturing the page.`,
                    variant: 'destructive',
                  });
                } finally {
                  setIsAutoAddingFields(false);
                }
              }}
            >
              {isAutoAddingFields ? <Trans>Processing...</Trans> : <Trans>Auto add fields</Trans>}
            </Button>
          </section>
          {/* Field details section. */}
--- a/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx
+++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx
@ -1,12 +1,11 @@
 import { useMemo, useState } from 'react';
 import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
 import type { DropResult } from '@hello-pangea/dnd';
 import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
 import { msg } from '@lingui/core/macro';
 import { Trans, useLingui } from '@lingui/react/macro';
 import { DocumentStatus } from '@prisma/client';
-import { FileWarningIcon, GripVerticalIcon, Loader2 } from 'lucide-react';
+import { FileWarningIcon, GripVerticalIcon, Loader2, X } from 'lucide-react';
 import { X } from 'lucide-react';
 import { ErrorCode as DropzoneErrorCode, type FileRejection } from 'react-dropzone';
 import { Link } from 'react-router';
--- a/apps/remix/server/api/ai.ts
+++ b/apps/remix/server/api/ai.ts
@ -13,7 +13,6 @@ import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error';
 import type { HonoEnv } from '../router';
 import {
  type TDetectObjectsAndDrawResponse,
  type TDetectObjectsResponse,
  type TGenerateTextResponse,
  ZDetectObjectsAndDrawRequestSchema,
@ -41,6 +40,88 @@ async function resizeAndCompressImage(imageBuffer: Buffer): Promise<Buffer> {
  return await sharp(imageBuffer).jpeg({ quality: 70 }).toBuffer();
 }
 const detectObjectsPrompt = `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
 IMPORTANT RULES:
 1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
 2. Analyze nearby text labels to determine the field type
 3. Return bounding boxes for the fillable area only, NOT the label text
 4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
 FIELD TYPES TO DETECT:
 • SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
 • INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
 • NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
 • EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
 • DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
 • CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
 • RADIO - Empty radio button circles (○) in groups, typically circular selection options
 • NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
 • DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
 • TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
 DETECTION GUIDELINES:
 - Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
 - If you're uncertain which type fits best, default to TEXT
 - For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
 - Signature fields are often longer horizontal lines or larger boxes
 - Date fields often show format hints or date separators (slashes, dashes)
 - Look for visual patterns: underscores (____), horizontal lines, box outlines
 - Return coordinates for the fillable area, not the descriptive label text
 COORDINATE SYSTEM:
 - [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
 - Top-left corner: ymin and xmin close to 0
 - Bottom-right corner: ymax and xmax close to 1000
 - Coordinates represent positions on a 1000x1000 grid overlaid on the image
 FIELD SIZING STRATEGY FOR LINE-BASED FIELDS:
 When detecting thin horizontal lines for SIGNATURE, INITIALS, NAME, EMAIL, DATE, TEXT, or NUMBER fields:
 1. Analyze the visual context around the detected line:
   - Look at the empty space ABOVE the detected line
   - Observe the spacing to any text labels, headers, or other form elements above
   - Assess what would be a reasonable field height to make the field clearly visible when filled
 2. Expand UPWARD from the detected line to create a usable field:
   - Keep ymax (bottom) at the detected line position (the line becomes the bottom edge)
   - Extend ymin (top) upward into the available whitespace
   - Aim to use 60-80% of the clear whitespace above the line, while being reasonable
   - The expanded field should provide comfortable space for signing/writing (minimum 30 units tall)
 3. Apply minimum dimensions: height at least 30 units (3% of 1000-scale), width at least 36 units
 4. Ensure ymin >= 0 (do not go off-page). If ymin would be negative, clamp to 0
 5. Do NOT apply this expansion to CHECKBOX, RADIO, or DROPDOWN fields - use detected dimensions for those
 6. Example: If you detect a signature line at ymax=500 with clear whitespace extending up to y=400:
   - Available whitespace: 100 units
   - Use 60-80% of that: 60-80 units
   - Expanded field: [ymin=420, xmin=200, ymax=500, xmax=600] (creates 80-unit tall field)
   - This gives comfortable signing space while respecting the form layout`;
 const runObjectDetection = async (imageBuffer: Buffer): Promise<TDetectObjectsResponse> => {
  const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
  const base64Image = compressedImageBuffer.toString('base64');
  const result = await generateObject({
    model: google('gemini-2.5-pro'),
    schema: ZDetectObjectsResponseSchema,
    messages: [
      {
        role: 'user',
        content: [
          {
            type: 'image',
            image: `data:image/jpeg;base64,${base64Image}`,
          },
          {
            type: 'text',
            text: detectObjectsPrompt,
          },
        ],
      },
    ],
  });
  return result.object;
 };
 export const aiRoute = new Hono<HonoEnv>()
  .use(
    '*',
@ -85,63 +166,9 @@ export const aiRoute = new Hono<HonoEnv>()
      const { imagePath } = c.req.valid('json');
      const imageBuffer = await readFile(imagePath);
-      const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
+      const detectedObjects = await runObjectDetection(imageBuffer);
      const base64Image = compressedImageBuffer.toString('base64');
-      const result = await generateObject({
+      return c.json<TDetectObjectsResponse>(detectedObjects);
        model: google('gemini-2.5-pro'),
        schema: ZDetectObjectsResponseSchema,
        messages: [
          {
            role: 'user',
            content: [
              {
                type: 'image',
                image: `data:image/jpeg;base64,${base64Image}`,
              },
              {
                type: 'text',
                text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
 IMPORTANT RULES:
 1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
 2. Analyze nearby text labels to determine the field type
 3. Return bounding boxes for the fillable area only, NOT the label text
 4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
 FIELD TYPES TO DETECT:
 • SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
 • INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
 • NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
 • EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
 • DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
 • CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
 • RADIO - Empty radio button circles (○) in groups, typically circular selection options
 • NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
 • DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
 • TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
 DETECTION GUIDELINES:
 - Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
 - If you're uncertain which type fits best, default to TEXT
 - For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
 - Signature fields are often longer horizontal lines or larger boxes
 - Date fields often show format hints or date separators (slashes, dashes)
 - Look for visual patterns: underscores (____), horizontal lines, box outlines
 - Return coordinates for the fillable area, not the descriptive label text
 COORDINATE SYSTEM:
 - [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
 - Top-left corner: ymin and xmin close to 0
 - Bottom-right corner: ymax and xmax close to 1000
 - Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
              },
            ],
          },
        ],
      });
      return c.json<TDetectObjectsResponse>(result.object);
    } catch (error) {
      console.error('Object detection failed:', error);
@ -156,218 +183,165 @@ COORDINATE SYSTEM:
    }
  })
-  .post(
+  .post('/detect-object-and-draw', async (c) => {
-    '/detect-object-and-draw',
+    try {
-    sValidator('json', ZDetectObjectsAndDrawRequestSchema),
+      await getSession(c.req.raw);
    async (c) => {
      try {
        await getSession(c.req.raw);
-        const { imagePath } = c.req.valid('json');
+      const parsedBody = await c.req.parseBody();
      const rawImage = parsedBody.image;
      const imageCandidate = Array.isArray(rawImage) ? rawImage[0] : rawImage;
      const parsed = ZDetectObjectsAndDrawRequestSchema.safeParse({ image: imageCandidate });
-        console.log(`[detect-object-and-draw] Reading image from: ${imagePath}`);
+      if (!parsed.success) {
-
+        throw new AppError(AppErrorCode.INVALID_REQUEST, {
-        const imageBuffer = await readFile(imagePath);
+          message: 'Image file is required',
-        const metadata = await sharp(imageBuffer).metadata();
+          userMessage: 'Please upload a valid image file.',
        const imageWidth = metadata.width;
        const imageHeight = metadata.height;
        console.log(
          `[detect-object-and-draw] Original image dimensions: ${imageWidth}x${imageHeight}`,
        );
        if (!imageWidth || !imageHeight) {
          throw new AppError(AppErrorCode.INVALID_REQUEST, {
            message: 'Unable to extract image dimensions',
            userMessage: 'The image file appears to be invalid or corrupted.',
          });
        }
        console.log('[detect-object-and-draw] Compressing image for Gemini API...');
        const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
        const base64Image = compressedImageBuffer.toString('base64');
        console.log('[detect-object-and-draw] Calling Gemini API for form field detection...');
        const result = await generateObject({
          model: google('gemini-2.5-pro'),
          schema: ZDetectObjectsResponseSchema,
          messages: [
            {
              role: 'user',
              content: [
                {
                  type: 'image',
                  image: `data:image/jpeg;base64,${base64Image}`,
                },
                {
                  type: 'text',
                  text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
 IMPORTANT RULES:
 1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
 2. Analyze nearby text labels to determine the field type
 3. Return bounding boxes for the fillable area only, NOT the label text
 4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
 FIELD TYPES TO DETECT:
 • SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
 • INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
 • NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
 • EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
 • DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
 • CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
 • RADIO - Empty radio button circles (○) in groups, typically circular selection options
 • NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
 • DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
 • TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
 DETECTION GUIDELINES:
 - Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
 - If you're uncertain which type fits best, default to TEXT
 - For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
 - Signature fields are often longer horizontal lines or larger boxes
 - Date fields often show format hints or date separators (slashes, dashes)
 - Look for visual patterns: underscores (____), horizontal lines, box outlines
 - Return coordinates for the fillable area, not the descriptive label text
 COORDINATE SYSTEM:
 - [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
 - Top-left corner: ymin and xmin close to 0
 - Bottom-right corner: ymax and xmax close to 1000
 - Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
                },
              ],
            },
          ],
        });
        console.log('[detect-object-and-draw] Gemini API call completed');
        const detectedObjects = result.object;
        console.log(
          `[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`,
        );
        const padding = { left: 80, top: 20, right: 20, bottom: 40 };
        const canvas = new Canvas(
          imageWidth + padding.left + padding.right,
          imageHeight + padding.top + padding.bottom,
        );
        const ctx = canvas.getContext('2d');
        const img = new Image();
        img.src = imageBuffer;
        ctx.drawImage(img, padding.left, padding.top);
        ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)';
        ctx.lineWidth = 1;
        for (let i = 0; i <= 1000; i += 100) {
          const x = padding.left + (i / 1000) * imageWidth;
          ctx.beginPath();
          ctx.moveTo(x, padding.top);
          ctx.lineTo(x, imageHeight + padding.top);
          ctx.stroke();
        }
        // Horizontal grid lines (every 100 units on 0-1000 scale)
        for (let i = 0; i <= 1000; i += 100) {
          const y = padding.top + (i / 1000) * imageHeight;
          ctx.beginPath();
          ctx.moveTo(padding.left, y);
          ctx.lineTo(imageWidth + padding.left, y);
          ctx.stroke();
        }
        const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
        detectedObjects.forEach((obj, index) => {
          const [ymin, xmin, ymax, xmax] = obj.box_2d.map((coord) => coord / 1000);
          const x = xmin * imageWidth + padding.left;
          const y = ymin * imageHeight + padding.top;
          const width = (xmax - xmin) * imageWidth;
          const height = (ymax - ymin) * imageHeight;
          ctx.strokeStyle = colors[index % colors.length];
          ctx.lineWidth = 5;
          ctx.strokeRect(x, y, width, height);
          ctx.fillStyle = colors[index % colors.length];
          ctx.font = '20px Arial';
          ctx.fillText(obj.label, x, y - 5);
        });
        ctx.strokeStyle = '#000000';
        ctx.lineWidth = 1;
        ctx.font = '26px Arial';
        ctx.beginPath();
        ctx.moveTo(padding.left, padding.top);
        ctx.lineTo(padding.left, imageHeight + padding.top);
        ctx.stroke();
        ctx.textAlign = 'right';
        ctx.textBaseline = 'middle';
        for (let i = 0; i <= 1000; i += 100) {
          const y = padding.top + (i / 1000) * imageHeight;
          ctx.fillStyle = '#000000';
          ctx.fillText(i.toString(), padding.left - 5, y);
          ctx.beginPath();
          ctx.moveTo(padding.left - 5, y);
          ctx.lineTo(padding.left, y);
          ctx.stroke();
        }
        ctx.beginPath();
        ctx.moveTo(padding.left, imageHeight + padding.top);
        ctx.lineTo(imageWidth + padding.left, imageHeight + padding.top);
        ctx.stroke();
        ctx.textAlign = 'center';
        ctx.textBaseline = 'top';
        for (let i = 0; i <= 1000; i += 100) {
          const x = padding.left + (i / 1000) * imageWidth;
          ctx.fillStyle = '#000000';
          ctx.fillText(i.toString(), x, imageHeight + padding.top + 5);
          ctx.beginPath();
          ctx.moveTo(x, imageHeight + padding.top);
          ctx.lineTo(x, imageHeight + padding.top + 5);
          ctx.stroke();
        }
        const now = new Date();
        const timestamp = now
          .toISOString()
          .replace(/[-:]/g, '')
          .replace(/\..+/, '')
          .replace('T', '_');
        const outputFilename = `detected_objects_${timestamp}.png`;
        const outputPath = join(process.cwd(), outputFilename);
        console.log('[detect-object-and-draw] Converting canvas to PNG buffer...');
        const pngBuffer = await canvas.toBuffer('png');
        console.log(`[detect-object-and-draw] Saving to: ${outputPath}`);
        await writeFile(outputPath, pngBuffer);
        console.log('[detect-object-and-draw] Image saved successfully!');
        return c.json<TDetectObjectsAndDrawResponse>({
          outputPath,
          detectedObjects,
        });
      } catch (error) {
        console.error('Object detection and drawing failed:', error);
        if (error instanceof AppError) {
          throw error;
        }
        throw new AppError(AppErrorCode.UNKNOWN_ERROR, {
          message: 'Failed to detect objects and draw',
          userMessage: 'An error occurred while detecting and drawing objects. Please try again.',
        });
      }
-    },
+
-  );
+      const imageBlob = parsed.data.image;
      const arrayBuffer = await imageBlob.arrayBuffer();
      const imageBuffer = Buffer.from(arrayBuffer);
      const metadata = await sharp(imageBuffer).metadata();
      const imageWidth = metadata.width;
      const imageHeight = metadata.height;
      console.log(
        `[detect-object-and-draw] Original image dimensions: ${imageWidth}x${imageHeight}`,
      );
      if (!imageWidth || !imageHeight) {
        throw new AppError(AppErrorCode.INVALID_REQUEST, {
          message: 'Unable to extract image dimensions',
          userMessage: 'The image file appears to be invalid or corrupted.',
        });
      }
      console.log('[detect-object-and-draw] Compressing image for Gemini API...');
      console.log('[detect-object-and-draw] Calling Gemini API for form field detection...');
      const detectedObjects = await runObjectDetection(imageBuffer);
      console.log('[detect-object-and-draw] Gemini API call completed');
      console.log(
        `[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`,
      );
      const padding = { left: 80, top: 20, right: 20, bottom: 40 };
      const canvas = new Canvas(
        imageWidth + padding.left + padding.right,
        imageHeight + padding.top + padding.bottom,
      );
      const ctx = canvas.getContext('2d');
      const img = new Image();
      img.src = imageBuffer;
      ctx.drawImage(img, padding.left, padding.top);
      ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)';
      ctx.lineWidth = 1;
      for (let i = 0; i <= 1000; i += 100) {
        const x = padding.left + (i / 1000) * imageWidth;
        ctx.beginPath();
        ctx.moveTo(x, padding.top);
        ctx.lineTo(x, imageHeight + padding.top);
        ctx.stroke();
      }
      // Horizontal grid lines (every 100 units on 0-1000 scale)
      for (let i = 0; i <= 1000; i += 100) {
        const y = padding.top + (i / 1000) * imageHeight;
        ctx.beginPath();
        ctx.moveTo(padding.left, y);
        ctx.lineTo(imageWidth + padding.left, y);
        ctx.stroke();
      }
      const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
      detectedObjects.forEach((obj, index) => {
        const [ymin, xmin, ymax, xmax] = obj.box_2d.map((coord) => coord / 1000);
        const x = xmin * imageWidth + padding.left;
        const y = ymin * imageHeight + padding.top;
        const width = (xmax - xmin) * imageWidth;
        const height = (ymax - ymin) * imageHeight;
        ctx.strokeStyle = colors[index % colors.length];
        ctx.lineWidth = 5;
        ctx.strokeRect(x, y, width, height);
        ctx.fillStyle = colors[index % colors.length];
        ctx.font = '20px Arial';
        ctx.fillText(obj.label, x, y - 5);
      });
      ctx.strokeStyle = '#000000';
      ctx.lineWidth = 1;
      ctx.font = '26px Arial';
      ctx.beginPath();
      ctx.moveTo(padding.left, padding.top);
      ctx.lineTo(padding.left, imageHeight + padding.top);
      ctx.stroke();
      ctx.textAlign = 'right';
      ctx.textBaseline = 'middle';
      for (let i = 0; i <= 1000; i += 100) {
        const y = padding.top + (i / 1000) * imageHeight;
        ctx.fillStyle = '#000000';
        ctx.fillText(i.toString(), padding.left - 5, y);
        ctx.beginPath();
        ctx.moveTo(padding.left - 5, y);
        ctx.lineTo(padding.left, y);
        ctx.stroke();
      }
      ctx.beginPath();
      ctx.moveTo(padding.left, imageHeight + padding.top);
      ctx.lineTo(imageWidth + padding.left, imageHeight + padding.top);
      ctx.stroke();
      ctx.textAlign = 'center';
      ctx.textBaseline = 'top';
      for (let i = 0; i <= 1000; i += 100) {
        const x = padding.left + (i / 1000) * imageWidth;
        ctx.fillStyle = '#000000';
        ctx.fillText(i.toString(), x, imageHeight + padding.top + 5);
        ctx.beginPath();
        ctx.moveTo(x, imageHeight + padding.top);
        ctx.lineTo(x, imageHeight + padding.top + 5);
        ctx.stroke();
      }
      const now = new Date();
      const timestamp = now
        .toISOString()
        .replace(/[-:]/g, '')
        .replace(/\..+/, '')
        .replace('T', '_');
      const outputFilename = `detected_objects_${timestamp}.png`;
      const outputPath = join(process.cwd(), outputFilename);
      console.log('[detect-object-and-draw] Converting canvas to PNG buffer...');
      const pngBuffer = await canvas.toBuffer('png');
      console.log(`[detect-object-and-draw] Saving to: ${outputPath}`);
      await writeFile(outputPath, pngBuffer);
      console.log('[detect-object-and-draw] Image saved successfully!');
      return c.json<TDetectObjectsResponse>(detectedObjects);
    } catch (error) {
      console.error('Object detection and drawing failed:', error);
      if (error instanceof AppError) {
        throw error;
      }
      throw new AppError(AppErrorCode.UNKNOWN_ERROR, {
        message: 'Failed to detect objects and draw',
        userMessage: 'An error occurred while detecting and drawing objects. Please try again.',
      });
    }
  });
--- a/apps/remix/server/api/ai.types.ts
+++ b/apps/remix/server/api/ai.types.ts
@ -44,13 +44,7 @@ export type TDetectObjectsRequest = z.infer<typeof ZDetectObjectsRequestSchema>;
 export type TDetectObjectsResponse = z.infer<typeof ZDetectObjectsResponseSchema>;
 export const ZDetectObjectsAndDrawRequestSchema = z.object({
-  imagePath: z.string().min(1, 'Image path is required'),
+  image: z.instanceof(Blob, { message: 'Image file is required' }),
 });
 export const ZDetectObjectsAndDrawResponseSchema = z.object({
  outputPath: z.string().describe('Path to the generated image with bounding boxes'),
  detectedObjects: z.array(ZDetectedObjectSchema).describe('Array of detected objects'),
 });
 export type TDetectObjectsAndDrawRequest = z.infer<typeof ZDetectObjectsAndDrawRequestSchema>;
 export type TDetectObjectsAndDrawResponse = z.infer<typeof ZDetectObjectsAndDrawResponseSchema>;
--- a/packages/lib/client-only/utils/page-canvas-registry.ts
+++ b/packages/lib/client-only/utils/page-canvas-registry.ts
@ -0,0 +1,110 @@
 import type Konva from 'konva';
 /**
 * Represents canvas references for a specific PDF page.
 */
 export interface PageCanvasRefs {
  /** The page number (1-indexed) */
  pageNumber: number;
  /** The canvas element containing the rendered PDF */
  pdfCanvas: HTMLCanvasElement;
  /** The Konva stage containing field overlays */
  konvaStage: Konva.Stage;
 }
 /**
 * Module-level registry to store canvas references by page number.
 * This allows any component to access page canvases without prop drilling.
 */
 const pageCanvasRegistry = new Map<number, PageCanvasRefs>();
 /**
 * Register a page's canvas references.
 * Call this when a page renderer mounts and has valid canvas refs.
 *
 * @param refs - The canvas references to register
 */
 export const registerPageCanvas = (refs: PageCanvasRefs): void => {
  pageCanvasRegistry.set(refs.pageNumber, refs);
 };
 /**
 * Unregister a page's canvas references.
 * Call this when a page renderer unmounts to prevent memory leaks.
 *
 * @param pageNumber - The page number to unregister
 */
 export const unregisterPageCanvas = (pageNumber: number): void => {
  pageCanvasRegistry.delete(pageNumber);
 };
 /**
 * Get canvas references for a specific page.
 *
 * @param pageNumber - The page number to retrieve
 * @returns The canvas references, or undefined if not registered
 */
 export const getPageCanvasRefs = (pageNumber: number): PageCanvasRefs | undefined => {
  return pageCanvasRegistry.get(pageNumber);
 };
 /**
 * Get all registered page numbers.
 *
 * @returns Array of page numbers currently registered
 */
 export const getRegisteredPageNumbers = (): number[] => {
  return Array.from(pageCanvasRegistry.keys()).sort((a, b) => a - b);
 };
 /**
 * Composite a PDF page with its field overlays into a single PNG Blob.
 * This creates a temporary canvas, draws the PDF canvas first (background),
 * then draws the Konva canvas on top (field overlays).
 *
 * @param pageNumber - The page number to composite (1-indexed)
 * @returns Promise that resolves to a PNG Blob, or null if page not found or compositing fails
 */
 export const compositePageToBlob = async (pageNumber: number): Promise<Blob | null> => {
  const refs = getPageCanvasRefs(pageNumber);
  if (!refs) {
    console.warn(`Page ${pageNumber} is not registered for canvas capture`);
    return null;
  }
  try {
    // Create temporary canvas with same dimensions as PDF canvas
    const tempCanvas = document.createElement('canvas');
    tempCanvas.width = refs.pdfCanvas.width;
    tempCanvas.height = refs.pdfCanvas.height;
    const ctx = tempCanvas.getContext('2d');
    if (!ctx) {
      console.error('Failed to get 2D context for temporary canvas');
      return null;
    }
    // Draw PDF canvas first (background layer)
    ctx.drawImage(refs.pdfCanvas, 0, 0);
    // Get Konva canvas and draw on top (field overlays)
    // Note: Konva's toCanvas() returns a new canvas with all layers rendered
    const konvaCanvas = refs.konvaStage.toCanvas();
    ctx.drawImage(konvaCanvas, 0, 0);
    // Convert to PNG Blob
    return new Promise((resolve, reject) => {
      tempCanvas.toBlob((blob) => {
        if (blob) {
          resolve(blob);
        } else {
          reject(new Error('Failed to convert canvas to blob'));
        }
      }, 'image/png');
    });
  } catch (error) {
    console.error(`Error compositing page ${pageNumber}:`, error);
    return null;
  }
 };
--- a/packages/lib/universal/field-renderer/field-constants.ts
+++ b/packages/lib/universal/field-renderer/field-constants.ts
@ -0,0 +1,19 @@
 /**
 * Shared constants for field dimension enforcement.
 *
 * These constants ensure consistency between:
 * 1. AI prompt (server/api/ai.ts) - instructs Gemini on minimum field dimensions
 * 2. Client enforcement (envelope-editor-fields-page.tsx) - fallback validation
 */
 /**
 * Minimum field height in pixels.
 * Fields smaller than this will be expanded to meet minimum usability requirements.
 */
 export const MIN_FIELD_HEIGHT_PX = 30;
 /**
 * Minimum field width in pixels.
 * Fields smaller than this will be expanded to meet minimum usability requirements.
 */
 export const MIN_FIELD_WIDTH_PX = 36;