feat: add AI field auto-placement with canvas registry

2025-11-14 00:32:43 +10:00 · 2025-10-29 23:03:58 +00:00
parent 94098bd762
commit 29be66a844
7 changed files with 623 additions and 279 deletions
--- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx
+++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx
@ -11,6 +11,10 @@ import type { TLocalField } from '@documenso/lib/client-only/hooks/use-editor-fi
 import { usePageRenderer } from '@documenso/lib/client-only/hooks/use-page-renderer';
 import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
 import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
+import {
+  registerPageCanvas,
+  unregisterPageCanvas,
+} from '@documenso/lib/client-only/utils/page-canvas-registry';
 import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
 import {
  MIN_FIELD_HEIGHT_PX,
@ -56,6 +60,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
    [editorFields.localFields, pageContext.pageNumber],
  );

+  /**
+   * Cleanup: Unregister canvas when component unmounts
+   */
+  useEffect(() => {
+    return () => {
+      unregisterPageCanvas(pageContext.pageNumber);
+    };
+  }, [pageContext.pageNumber]);
+
  const handleResizeOrMove = (event: KonvaEventObject<Event>) => {
    const { current: container } = canvasElement;

@ -214,6 +227,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
    currentStage.on('transformend', () => setIsFieldChanging(false));

    currentPageLayer.batchDraw();
+
+    // Register this page's canvas references now that everything is initialized
+    if (canvasElement.current && currentStage) {
+      registerPageCanvas({
+        pageNumber: pageContext.pageNumber,
+        pdfCanvas: canvasElement.current,
+        konvaStage: currentStage,
+      });
+    }
  };

  /**
--- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx
+++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx
@ -1,4 +1,4 @@
-import { lazy, useEffect, useMemo } from 'react';
+import { lazy, useEffect, useMemo, useState } from 'react';

 import type { MessageDescriptor } from '@lingui/core';
 import { msg } from '@lingui/core/macro';
@ -11,6 +11,10 @@ import { match } from 'ts-pattern';

 import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
 import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
+import {
+  compositePageToBlob,
+  getPageCanvasRefs,
+} from '@documenso/lib/client-only/utils/page-canvas-registry';
 import type {
  TCheckboxFieldMeta,
  TDateFieldMeta,
@ -24,12 +28,15 @@ import type {
  TSignatureFieldMeta,
  TTextFieldMeta,
 } from '@documenso/lib/types/field-meta';
+import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
 import { canRecipientFieldsBeModified } from '@documenso/lib/utils/recipients';
 import { AnimateGenericFadeInOut } from '@documenso/ui/components/animate/animate-generic-fade-in-out';
 import PDFViewerKonvaLazy from '@documenso/ui/components/pdf-viewer/pdf-viewer-konva-lazy';
 import { Alert, AlertDescription } from '@documenso/ui/primitives/alert';
+import { Button } from '@documenso/ui/primitives/button';
 import { RecipientSelector } from '@documenso/ui/primitives/recipient-selector';
 import { Separator } from '@documenso/ui/primitives/separator';
+import { useToast } from '@documenso/ui/primitives/use-toast';

 import { EditorFieldCheckboxForm } from '~/components/forms/editor/editor-field-checkbox-form';
 import { EditorFieldDateForm } from '~/components/forms/editor/editor-field-date-form';
@ -49,6 +56,94 @@ const EnvelopeEditorFieldsPageRenderer = lazy(
  async () => import('./envelope-editor-fields-page-renderer'),
 );

+/**
+ * Enforces minimum field dimensions and centers the field when expanding to meet minimums.
+ *
+ * AI often detects form lines as very thin fields (0.2-0.5% height). This function ensures
+ * fields meet minimum usability requirements by expanding them to at least 30px height and
+ * 36px width, while keeping them centered on their original position.
+ *
+ * @param params - Field dimensions and page size
+ * @param params.positionX - Field X position as percentage (0-100)
+ * @param params.positionY - Field Y position as percentage (0-100)
+ * @param params.width - Field width as percentage (0-100)
+ * @param params.height - Field height as percentage (0-100)
+ * @param params.pageWidth - Page width in pixels
+ * @param params.pageHeight - Page height in pixels
+ * @returns Adjusted field dimensions with minimums enforced and centered
+ *
+ * @example
+ * // AI detected a thin line: 0.3% height
+ * const adjusted = enforceMinimumFieldDimensions({
+ *   positionX: 20, positionY: 50, width: 30, height: 0.3,
+ *   pageWidth: 800, pageHeight: 1100
+ * });
+ * // Result: height expanded to ~2.7% (30px), centered on original position
+ */
+/**
+ * Enforces minimum field dimensions with centered expansion.
+ *
+ * If a field is smaller than the minimum width or height, it will be expanded
+ * to meet the minimum requirements while staying centered on its original position.
+ */
+const enforceMinimumFieldDimensions = (params: {
+  positionX: number;
+  positionY: number;
+  width: number;
+  height: number;
+  pageWidth: number;
+  pageHeight: number;
+}): {
+  positionX: number;
+  positionY: number;
+  width: number;
+  height: number;
+} => {
+  const MIN_HEIGHT_PX = 30;
+  const MIN_WIDTH_PX = 36;
+
+  // Convert percentage to pixels to check against minimums
+  const widthPx = (params.width / 100) * params.pageWidth;
+  const heightPx = (params.height / 100) * params.pageHeight;
+
+  let adjustedWidth = params.width;
+  let adjustedHeight = params.height;
+  let adjustedPositionX = params.positionX;
+  let adjustedPositionY = params.positionY;
+
+  if (widthPx < MIN_WIDTH_PX) {
+    const centerXPx = (params.positionX / 100) * params.pageWidth + widthPx / 2;
+    adjustedWidth = (MIN_WIDTH_PX / params.pageWidth) * 100;
+    adjustedPositionX = ((centerXPx - MIN_WIDTH_PX / 2) / params.pageWidth) * 100;
+
+    if (adjustedPositionX < 0) {
+      adjustedPositionX = 0;
+    } else if (adjustedPositionX + adjustedWidth > 100) {
+      adjustedPositionX = 100 - adjustedWidth;
+    }
+  }
+
+  if (heightPx < MIN_HEIGHT_PX) {
+    const centerYPx = (params.positionY / 100) * params.pageHeight + heightPx / 2;
+    adjustedHeight = (MIN_HEIGHT_PX / params.pageHeight) * 100;
+
+    adjustedPositionY = ((centerYPx - MIN_HEIGHT_PX / 2) / params.pageHeight) * 100;
+
+    if (adjustedPositionY < 0) {
+      adjustedPositionY = 0;
+    } else if (adjustedPositionY + adjustedHeight > 100) {
+      adjustedPositionY = 100 - adjustedHeight;
+    }
+  }
+
+  return {
+    positionX: adjustedPositionX,
+    positionY: adjustedPositionY,
+    width: adjustedWidth,
+    height: adjustedHeight,
+  };
+};
+
 const FieldSettingsTypeTranslations: Record<FieldType, MessageDescriptor> = {
  [FieldType.SIGNATURE]: msg`Signature Settings`,
  [FieldType.FREE_SIGNATURE]: msg`Free Signature Settings`,
@ -69,6 +164,9 @@ export const EnvelopeEditorFieldsPage = () => {
  const { currentEnvelopeItem } = useCurrentEnvelopeRender();

  const { t } = useLingui();
+  const { toast } = useToast();
+
+  const [isAutoAddingFields, setIsAutoAddingFields] = useState(false);

  const selectedField = useMemo(
    () => structuredClone(editorFields.selectedField),
@ -187,6 +285,134 @@ export const EnvelopeEditorFieldsPage = () => {
              selectedRecipientId={editorFields.selectedRecipient?.id ?? null}
              selectedEnvelopeItemId={currentEnvelopeItem?.id ?? null}
            />
+
+            <Button
+              className="mt-4 w-full"
+              variant="outline"
+              disabled={isAutoAddingFields}
+              onClick={async () => {
+                setIsAutoAddingFields(true);
+
+                try {
+                  const blob = await compositePageToBlob(1);
+
+                  if (!blob) {
+                    toast({
+                      title: t`Error`,
+                      description: t`Failed to capture page. Please ensure the document is fully loaded.`,
+                      variant: 'destructive',
+                    });
+                    return;
+                  }
+
+                  console.log('Successfully captured page 1 as PNG Blob:', {
+                    size: `${(blob.size / 1024).toFixed(2)} KB`,
+                    type: blob.type,
+                  });
+                  console.log('Blob object:', blob);
+
+                  console.log('[Auto Add Fields] Sending image to AI endpoint...');
+                  const formData = new FormData();
+                  formData.append('image', blob, 'page-1.png');
+
+                  const response = await fetch('/api/ai/detect-object-and-draw', {
+                    method: 'POST',
+                    body: formData,
+                    credentials: 'include',
+                  });
+
+                  if (!response.ok) {
+                    throw new Error(`AI detection failed: ${response.statusText}`);
+                  }
+
+                  const detectedFields = await response.json();
+                  console.log(
+                    `[Auto Add Fields] Detected ${detectedFields.length} fields:`,
+                    detectedFields,
+                  );
+
+                  if (!editorFields.selectedRecipient || !currentEnvelopeItem) {
+                    toast({
+                      title: t`Warning`,
+                      description: t`Please select a recipient before adding fields.`,
+                      variant: 'destructive',
+                    });
+                    return;
+                  }
+
+                  const pageCanvasRefs = getPageCanvasRefs(1);
+                  if (!pageCanvasRefs) {
+                    console.warn(
+                      '[Auto Add Fields] Could not get page dimensions for minimum field enforcement',
+                    );
+                  }
+
+                  let addedCount = 0;
+                  for (const detected of detectedFields) {
+                    const [ymin, xmin, ymax, xmax] = detected.box_2d;
+                    let positionX = (xmin / 1000) * 100;
+                    let positionY = (ymin / 1000) * 100;
+                    let width = ((xmax - xmin) / 1000) * 100;
+                    let height = ((ymax - ymin) / 1000) * 100;
+
+                    if (pageCanvasRefs) {
+                      const adjusted = enforceMinimumFieldDimensions({
+                        positionX,
+                        positionY,
+                        width,
+                        height,
+                        pageWidth: pageCanvasRefs.pdfCanvas.width,
+                        pageHeight: pageCanvasRefs.pdfCanvas.height,
+                      });
+
+                      positionX = adjusted.positionX;
+                      positionY = adjusted.positionY;
+                      width = adjusted.width;
+                      height = adjusted.height;
+                    }
+
+                    const fieldType = detected.label as FieldType;
+
+                    try {
+                      editorFields.addField({
+                        envelopeItemId: currentEnvelopeItem.id,
+                        page: 1,
+                        type: fieldType,
+                        positionX,
+                        positionY,
+                        width,
+                        height,
+                        recipientId: editorFields.selectedRecipient.id,
+                        fieldMeta: structuredClone(FIELD_META_DEFAULT_VALUES[fieldType]),
+                      });
+                      addedCount++;
+                    } catch (error) {
+                      console.error(`Failed to add ${fieldType} field:`, error);
+                    }
+                  }
+
+                  console.log(
+                    `[Auto Add Fields] Successfully added ${addedCount} fields to the document`,
+                  );
+
+                  toast({
+                    title: t`Success`,
+                    description: t`Added ${addedCount} fields to the document`,
+                  });
+                } catch (error) {
+                  console.error('Auto add fields error:', error);
+                  toast({
+                    title: t`Error`,
+                    description: t`An unexpected error occurred while capturing the page.`,
+                    variant: 'destructive',
+                  });
+                } finally {
+                  setIsAutoAddingFields(false);
+                }
+              }}
+            >
+              {isAutoAddingFields ? <Trans>Processing...</Trans> : <Trans>Auto add fields</Trans>}
+            </Button>
          </section>

          {/* Field details section. */}
--- a/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx
+++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx
@ -1,12 +1,11 @@
 import { useMemo, useState } from 'react';

-import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
 import type { DropResult } from '@hello-pangea/dnd';
+import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
 import { msg } from '@lingui/core/macro';
 import { Trans, useLingui } from '@lingui/react/macro';
 import { DocumentStatus } from '@prisma/client';
-import { FileWarningIcon, GripVerticalIcon, Loader2 } from 'lucide-react';
-import { X } from 'lucide-react';
+import { FileWarningIcon, GripVerticalIcon, Loader2, X } from 'lucide-react';
 import { ErrorCode as DropzoneErrorCode, type FileRejection } from 'react-dropzone';
 import { Link } from 'react-router';

--- a/apps/remix/server/api/ai.ts
+++ b/apps/remix/server/api/ai.ts
@ -13,7 +13,6 @@ import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error';

 import type { HonoEnv } from '../router';
 import {
-  type TDetectObjectsAndDrawResponse,
  type TDetectObjectsResponse,
  type TGenerateTextResponse,
  ZDetectObjectsAndDrawRequestSchema,
@ -41,6 +40,88 @@ async function resizeAndCompressImage(imageBuffer: Buffer): Promise<Buffer> {
  return await sharp(imageBuffer).jpeg({ quality: 70 }).toBuffer();
 }

+const detectObjectsPrompt = `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
+
+IMPORTANT RULES:
+1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
+2. Analyze nearby text labels to determine the field type
+3. Return bounding boxes for the fillable area only, NOT the label text
+4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
+
+FIELD TYPES TO DETECT:
+• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
+• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
+• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
+• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
+• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
+• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
+• RADIO - Empty radio button circles (○) in groups, typically circular selection options
+• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
+• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
+• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
+
+DETECTION GUIDELINES:
+- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
+- If you're uncertain which type fits best, default to TEXT
+- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
+- Signature fields are often longer horizontal lines or larger boxes
+- Date fields often show format hints or date separators (slashes, dashes)
+- Look for visual patterns: underscores (____), horizontal lines, box outlines
+- Return coordinates for the fillable area, not the descriptive label text
+
+COORDINATE SYSTEM:
+- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
+- Top-left corner: ymin and xmin close to 0
+- Bottom-right corner: ymax and xmax close to 1000
+- Coordinates represent positions on a 1000x1000 grid overlaid on the image
+
+FIELD SIZING STRATEGY FOR LINE-BASED FIELDS:
+When detecting thin horizontal lines for SIGNATURE, INITIALS, NAME, EMAIL, DATE, TEXT, or NUMBER fields:
+1. Analyze the visual context around the detected line:
+   - Look at the empty space ABOVE the detected line
+   - Observe the spacing to any text labels, headers, or other form elements above
+   - Assess what would be a reasonable field height to make the field clearly visible when filled
+2. Expand UPWARD from the detected line to create a usable field:
+   - Keep ymax (bottom) at the detected line position (the line becomes the bottom edge)
+   - Extend ymin (top) upward into the available whitespace
+   - Aim to use 60-80% of the clear whitespace above the line, while being reasonable
+   - The expanded field should provide comfortable space for signing/writing (minimum 30 units tall)
+3. Apply minimum dimensions: height at least 30 units (3% of 1000-scale), width at least 36 units
+4. Ensure ymin >= 0 (do not go off-page). If ymin would be negative, clamp to 0
+5. Do NOT apply this expansion to CHECKBOX, RADIO, or DROPDOWN fields - use detected dimensions for those
+6. Example: If you detect a signature line at ymax=500 with clear whitespace extending up to y=400:
+   - Available whitespace: 100 units
+   - Use 60-80% of that: 60-80 units
+   - Expanded field: [ymin=420, xmin=200, ymax=500, xmax=600] (creates 80-unit tall field)
+   - This gives comfortable signing space while respecting the form layout`;
+
+const runObjectDetection = async (imageBuffer: Buffer): Promise<TDetectObjectsResponse> => {
+  const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
+  const base64Image = compressedImageBuffer.toString('base64');
+
+  const result = await generateObject({
+    model: google('gemini-2.5-pro'),
+    schema: ZDetectObjectsResponseSchema,
+    messages: [
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'image',
+            image: `data:image/jpeg;base64,${base64Image}`,
+          },
+          {
+            type: 'text',
+            text: detectObjectsPrompt,
+          },
+        ],
+      },
+    ],
+  });
+
+  return result.object;
+};
+
 export const aiRoute = new Hono<HonoEnv>()
  .use(
    '*',
@ -85,63 +166,9 @@ export const aiRoute = new Hono<HonoEnv>()
      const { imagePath } = c.req.valid('json');

      const imageBuffer = await readFile(imagePath);
-      const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
-      const base64Image = compressedImageBuffer.toString('base64');
+      const detectedObjects = await runObjectDetection(imageBuffer);

-      const result = await generateObject({
-        model: google('gemini-2.5-pro'),
-        schema: ZDetectObjectsResponseSchema,
-        messages: [
-          {
-            role: 'user',
-            content: [
-              {
-                type: 'image',
-                image: `data:image/jpeg;base64,${base64Image}`,
-              },
-              {
-                type: 'text',
-                text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
-
-IMPORTANT RULES:
-1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
-2. Analyze nearby text labels to determine the field type
-3. Return bounding boxes for the fillable area only, NOT the label text
-4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
-
-FIELD TYPES TO DETECT:
-• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
-• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
-• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
-• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
-• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
-• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
-• RADIO - Empty radio button circles (○) in groups, typically circular selection options
-• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
-• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
-• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
-
-DETECTION GUIDELINES:
- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
- If you're uncertain which type fits best, default to TEXT
- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
- Signature fields are often longer horizontal lines or larger boxes
- Date fields often show format hints or date separators (slashes, dashes)
- Look for visual patterns: underscores (____), horizontal lines, box outlines
- Return coordinates for the fillable area, not the descriptive label text
-
-COORDINATE SYSTEM:
- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
- Top-left corner: ymin and xmin close to 0
- Bottom-right corner: ymax and xmax close to 1000
- Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
-              },
-            ],
-          },
-        ],
-      });
-
-      return c.json<TDetectObjectsResponse>(result.object);
+      return c.json<TDetectObjectsResponse>(detectedObjects);
    } catch (error) {
      console.error('Object detection failed:', error);

@ -156,18 +183,25 @@ COORDINATE SYSTEM:
    }
  })

-  .post(
-    '/detect-object-and-draw',
-    sValidator('json', ZDetectObjectsAndDrawRequestSchema),
-    async (c) => {
+  .post('/detect-object-and-draw', async (c) => {
    try {
      await getSession(c.req.raw);

-        const { imagePath } = c.req.valid('json');
+      const parsedBody = await c.req.parseBody();
+      const rawImage = parsedBody.image;
+      const imageCandidate = Array.isArray(rawImage) ? rawImage[0] : rawImage;
+      const parsed = ZDetectObjectsAndDrawRequestSchema.safeParse({ image: imageCandidate });

-        console.log(`[detect-object-and-draw] Reading image from: ${imagePath}`);
+      if (!parsed.success) {
+        throw new AppError(AppErrorCode.INVALID_REQUEST, {
+          message: 'Image file is required',
+          userMessage: 'Please upload a valid image file.',
+        });
+      }

-        const imageBuffer = await readFile(imagePath);
+      const imageBlob = parsed.data.image;
+      const arrayBuffer = await imageBlob.arrayBuffer();
+      const imageBuffer = Buffer.from(arrayBuffer);
      const metadata = await sharp(imageBuffer).metadata();
      const imageWidth = metadata.width;
      const imageHeight = metadata.height;
@ -184,66 +218,10 @@ COORDINATE SYSTEM:
      }

      console.log('[detect-object-and-draw] Compressing image for Gemini API...');
-        const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
-        const base64Image = compressedImageBuffer.toString('base64');
-
      console.log('[detect-object-and-draw] Calling Gemini API for form field detection...');
-        const result = await generateObject({
-          model: google('gemini-2.5-pro'),
-          schema: ZDetectObjectsResponseSchema,
-          messages: [
-            {
-              role: 'user',
-              content: [
-                {
-                  type: 'image',
-                  image: `data:image/jpeg;base64,${base64Image}`,
-                },
-                {
-                  type: 'text',
-                  text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
-
-IMPORTANT RULES:
-1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
-2. Analyze nearby text labels to determine the field type
-3. Return bounding boxes for the fillable area only, NOT the label text
-4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
-
-FIELD TYPES TO DETECT:
-• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
-• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
-• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
-• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
-• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
-• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
-• RADIO - Empty radio button circles (○) in groups, typically circular selection options
-• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
-• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
-• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
-
-DETECTION GUIDELINES:
- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
- If you're uncertain which type fits best, default to TEXT
- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
- Signature fields are often longer horizontal lines or larger boxes
- Date fields often show format hints or date separators (slashes, dashes)
- Look for visual patterns: underscores (____), horizontal lines, box outlines
- Return coordinates for the fillable area, not the descriptive label text
-
-COORDINATE SYSTEM:
- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
- Top-left corner: ymin and xmin close to 0
- Bottom-right corner: ymax and xmax close to 1000
- Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
-                },
-              ],
-            },
-          ],
-        });
+      const detectedObjects = await runObjectDetection(imageBuffer);
      console.log('[detect-object-and-draw] Gemini API call completed');

-        const detectedObjects = result.object;
-
      console.log(
        `[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`,
      );
@ -353,10 +331,7 @@ COORDINATE SYSTEM:
      await writeFile(outputPath, pngBuffer);

      console.log('[detect-object-and-draw] Image saved successfully!');
-        return c.json<TDetectObjectsAndDrawResponse>({
-          outputPath,
-          detectedObjects,
-        });
+      return c.json<TDetectObjectsResponse>(detectedObjects);
    } catch (error) {
      console.error('Object detection and drawing failed:', error);

@ -369,5 +344,4 @@ COORDINATE SYSTEM:
        userMessage: 'An error occurred while detecting and drawing objects. Please try again.',
      });
    }
-    },
-  );
+  });
--- a/apps/remix/server/api/ai.types.ts
+++ b/apps/remix/server/api/ai.types.ts
@ -44,13 +44,7 @@ export type TDetectObjectsRequest = z.infer<typeof ZDetectObjectsRequestSchema>;
 export type TDetectObjectsResponse = z.infer<typeof ZDetectObjectsResponseSchema>;

 export const ZDetectObjectsAndDrawRequestSchema = z.object({
-  imagePath: z.string().min(1, 'Image path is required'),
-});
-
-export const ZDetectObjectsAndDrawResponseSchema = z.object({
-  outputPath: z.string().describe('Path to the generated image with bounding boxes'),
-  detectedObjects: z.array(ZDetectedObjectSchema).describe('Array of detected objects'),
+  image: z.instanceof(Blob, { message: 'Image file is required' }),
 });

 export type TDetectObjectsAndDrawRequest = z.infer<typeof ZDetectObjectsAndDrawRequestSchema>;
-export type TDetectObjectsAndDrawResponse = z.infer<typeof ZDetectObjectsAndDrawResponseSchema>;
--- a/packages/lib/client-only/utils/page-canvas-registry.ts
+++ b/packages/lib/client-only/utils/page-canvas-registry.ts
@ -0,0 +1,110 @@
+import type Konva from 'konva';
+
+/**
+ * Represents canvas references for a specific PDF page.
+ */
+export interface PageCanvasRefs {
+  /** The page number (1-indexed) */
+  pageNumber: number;
+  /** The canvas element containing the rendered PDF */
+  pdfCanvas: HTMLCanvasElement;
+  /** The Konva stage containing field overlays */
+  konvaStage: Konva.Stage;
+}
+
+/**
+ * Module-level registry to store canvas references by page number.
+ * This allows any component to access page canvases without prop drilling.
+ */
+const pageCanvasRegistry = new Map<number, PageCanvasRefs>();
+
+/**
+ * Register a page's canvas references.
+ * Call this when a page renderer mounts and has valid canvas refs.
+ *
+ * @param refs - The canvas references to register
+ */
+export const registerPageCanvas = (refs: PageCanvasRefs): void => {
+  pageCanvasRegistry.set(refs.pageNumber, refs);
+};
+
+/**
+ * Unregister a page's canvas references.
+ * Call this when a page renderer unmounts to prevent memory leaks.
+ *
+ * @param pageNumber - The page number to unregister
+ */
+export const unregisterPageCanvas = (pageNumber: number): void => {
+  pageCanvasRegistry.delete(pageNumber);
+};
+
+/**
+ * Get canvas references for a specific page.
+ *
+ * @param pageNumber - The page number to retrieve
+ * @returns The canvas references, or undefined if not registered
+ */
+export const getPageCanvasRefs = (pageNumber: number): PageCanvasRefs | undefined => {
+  return pageCanvasRegistry.get(pageNumber);
+};
+
+/**
+ * Get all registered page numbers.
+ *
+ * @returns Array of page numbers currently registered
+ */
+export const getRegisteredPageNumbers = (): number[] => {
+  return Array.from(pageCanvasRegistry.keys()).sort((a, b) => a - b);
+};
+
+/**
+ * Composite a PDF page with its field overlays into a single PNG Blob.
+ * This creates a temporary canvas, draws the PDF canvas first (background),
+ * then draws the Konva canvas on top (field overlays).
+ *
+ * @param pageNumber - The page number to composite (1-indexed)
+ * @returns Promise that resolves to a PNG Blob, or null if page not found or compositing fails
+ */
+export const compositePageToBlob = async (pageNumber: number): Promise<Blob | null> => {
+  const refs = getPageCanvasRefs(pageNumber);
+
+  if (!refs) {
+    console.warn(`Page ${pageNumber} is not registered for canvas capture`);
+    return null;
+  }
+
+  try {
+    // Create temporary canvas with same dimensions as PDF canvas
+    const tempCanvas = document.createElement('canvas');
+    tempCanvas.width = refs.pdfCanvas.width;
+    tempCanvas.height = refs.pdfCanvas.height;
+
+    const ctx = tempCanvas.getContext('2d');
+    if (!ctx) {
+      console.error('Failed to get 2D context for temporary canvas');
+      return null;
+    }
+
+    // Draw PDF canvas first (background layer)
+    ctx.drawImage(refs.pdfCanvas, 0, 0);
+
+    // Get Konva canvas and draw on top (field overlays)
+    // Note: Konva's toCanvas() returns a new canvas with all layers rendered
+    const konvaCanvas = refs.konvaStage.toCanvas();
+    ctx.drawImage(konvaCanvas, 0, 0);
+
+    // Convert to PNG Blob
+    return new Promise((resolve, reject) => {
+      tempCanvas.toBlob((blob) => {
+        if (blob) {
+          resolve(blob);
+        } else {
+          reject(new Error('Failed to convert canvas to blob'));
+        }
+      }, 'image/png');
+    });
+  } catch (error) {
+    console.error(`Error compositing page ${pageNumber}:`, error);
+    return null;
+  }
+};
--- a/packages/lib/universal/field-renderer/field-constants.ts
+++ b/packages/lib/universal/field-renderer/field-constants.ts
@ -0,0 +1,19 @@
+/**
+ * Shared constants for field dimension enforcement.
+ *
+ * These constants ensure consistency between:
+ * 1. AI prompt (server/api/ai.ts) - instructs Gemini on minimum field dimensions
+ * 2. Client enforcement (envelope-editor-fields-page.tsx) - fallback validation
+ */
+
+/**
+ * Minimum field height in pixels.
+ * Fields smaller than this will be expanded to meet minimum usability requirements.
+ */
+export const MIN_FIELD_HEIGHT_PX = 30;
+
+/**
+ * Minimum field width in pixels.
+ * Fields smaller than this will be expanded to meet minimum usability requirements.
+ */
+export const MIN_FIELD_WIDTH_PX = 36;