From 29be66a844a702324af17eb0c1fb213b58199998 Mon Sep 17 00:00:00 2001
From: Ephraim Atta-Duncan <ephraimduncan68@gmail.com>
Date: Wed, 29 Oct 2025 23:03:58 +0000
Subject: [PATCH] feat: add AI field auto-placement with canvas registry

---
 .../envelope-editor-fields-page-renderer.tsx  |  22 +
 .../envelope-editor-fields-page.tsx           | 228 +++++++-
 .../envelope-editor-upload-page.tsx           |   5 +-
 apps/remix/server/api/ai.ts                   | 510 +++++++++---------
 apps/remix/server/api/ai.types.ts             |   8 +-
 .../client-only/utils/page-canvas-registry.ts | 110 ++++
 .../field-renderer/field-constants.ts         |  19 +
 7 files changed, 623 insertions(+), 279 deletions(-)
 create mode 100644 packages/lib/client-only/utils/page-canvas-registry.ts
 create mode 100644 packages/lib/universal/field-renderer/field-constants.ts
diff --git a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx
index c75fb52a5..f0af450d8 100644
--- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx
+++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page-renderer.tsx
@@ -11,6 +11,10 @@ import type { TLocalField } from '@documenso/lib/client-only/hooks/use-editor-fi
 import { usePageRenderer } from '@documenso/lib/client-only/hooks/use-page-renderer';
 import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
 import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
+import {
+  registerPageCanvas,
+  unregisterPageCanvas,
+} from '@documenso/lib/client-only/utils/page-canvas-registry';
 import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
 import {
   MIN_FIELD_HEIGHT_PX,
@@ -56,6 +60,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
     [editorFields.localFields, pageContext.pageNumber],
   );
 
+  /**
+   * Cleanup: Unregister canvas when component unmounts
+   */
+  useEffect(() => {
+    return () => {
+      unregisterPageCanvas(pageContext.pageNumber);
+    };
+  }, [pageContext.pageNumber]);
+
   const handleResizeOrMove = (event: KonvaEventObject<Event>) => {
     const { current: container } = canvasElement;
 
@@ -214,6 +227,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
     currentStage.on('transformend', () => setIsFieldChanging(false));
 
     currentPageLayer.batchDraw();
+
+    // Register this page's canvas references now that everything is initialized
+    if (canvasElement.current && currentStage) {
+      registerPageCanvas({
+        pageNumber: pageContext.pageNumber,
+        pdfCanvas: canvasElement.current,
+        konvaStage: currentStage,
+      });
+    }
   };
 
   /**
diff --git a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx
index 4bd0915da..c04c9b85d 100644
--- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx
+++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx
@@ -1,4 +1,4 @@
-import { lazy, useEffect, useMemo } from 'react';
+import { lazy, useEffect, useMemo, useState } from 'react';
 
 import type { MessageDescriptor } from '@lingui/core';
 import { msg } from '@lingui/core/macro';
@@ -11,6 +11,10 @@ import { match } from 'ts-pattern';
 
 import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
 import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
+import {
+  compositePageToBlob,
+  getPageCanvasRefs,
+} from '@documenso/lib/client-only/utils/page-canvas-registry';
 import type {
   TCheckboxFieldMeta,
   TDateFieldMeta,
@@ -24,12 +28,15 @@ import type {
   TSignatureFieldMeta,
   TTextFieldMeta,
 } from '@documenso/lib/types/field-meta';
+import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
 import { canRecipientFieldsBeModified } from '@documenso/lib/utils/recipients';
 import { AnimateGenericFadeInOut } from '@documenso/ui/components/animate/animate-generic-fade-in-out';
 import PDFViewerKonvaLazy from '@documenso/ui/components/pdf-viewer/pdf-viewer-konva-lazy';
 import { Alert, AlertDescription } from '@documenso/ui/primitives/alert';
+import { Button } from '@documenso/ui/primitives/button';
 import { RecipientSelector } from '@documenso/ui/primitives/recipient-selector';
 import { Separator } from '@documenso/ui/primitives/separator';
+import { useToast } from '@documenso/ui/primitives/use-toast';
 
 import { EditorFieldCheckboxForm } from '~/components/forms/editor/editor-field-checkbox-form';
 import { EditorFieldDateForm } from '~/components/forms/editor/editor-field-date-form';
@@ -49,6 +56,94 @@ const EnvelopeEditorFieldsPageRenderer = lazy(
   async () => import('./envelope-editor-fields-page-renderer'),
 );
 
+/**
+ * Enforces minimum field dimensions and centers the field when expanding to meet minimums.
+ *
+ * AI often detects form lines as very thin fields (0.2-0.5% height). This function ensures
+ * fields meet minimum usability requirements by expanding them to at least 30px height and
+ * 36px width, while keeping them centered on their original position.
+ *
+ * @param params - Field dimensions and page size
+ * @param params.positionX - Field X position as percentage (0-100)
+ * @param params.positionY - Field Y position as percentage (0-100)
+ * @param params.width - Field width as percentage (0-100)
+ * @param params.height - Field height as percentage (0-100)
+ * @param params.pageWidth - Page width in pixels
+ * @param params.pageHeight - Page height in pixels
+ * @returns Adjusted field dimensions with minimums enforced and centered
+ *
+ * @example
+ * // AI detected a thin line: 0.3% height
+ * const adjusted = enforceMinimumFieldDimensions({
+ *   positionX: 20, positionY: 50, width: 30, height: 0.3,
+ *   pageWidth: 800, pageHeight: 1100
+ * });
+ * // Result: height expanded to ~2.7% (30px), centered on original position
+ */
+/**
+ * Enforces minimum field dimensions with centered expansion.
+ *
+ * If a field is smaller than the minimum width or height, it will be expanded
+ * to meet the minimum requirements while staying centered on its original position.
+ */
+const enforceMinimumFieldDimensions = (params: {
+  positionX: number;
+  positionY: number;
+  width: number;
+  height: number;
+  pageWidth: number;
+  pageHeight: number;
+}): {
+  positionX: number;
+  positionY: number;
+  width: number;
+  height: number;
+} => {
+  const MIN_HEIGHT_PX = 30;
+  const MIN_WIDTH_PX = 36;
+
+  // Convert percentage to pixels to check against minimums
+  const widthPx = (params.width / 100) * params.pageWidth;
+  const heightPx = (params.height / 100) * params.pageHeight;
+
+  let adjustedWidth = params.width;
+  let adjustedHeight = params.height;
+  let adjustedPositionX = params.positionX;
+  let adjustedPositionY = params.positionY;
+
+  if (widthPx < MIN_WIDTH_PX) {
+    const centerXPx = (params.positionX / 100) * params.pageWidth + widthPx / 2;
+    adjustedWidth = (MIN_WIDTH_PX / params.pageWidth) * 100;
+    adjustedPositionX = ((centerXPx - MIN_WIDTH_PX / 2) / params.pageWidth) * 100;
+
+    if (adjustedPositionX < 0) {
+      adjustedPositionX = 0;
+    } else if (adjustedPositionX + adjustedWidth > 100) {
+      adjustedPositionX = 100 - adjustedWidth;
+    }
+  }
+
+  if (heightPx < MIN_HEIGHT_PX) {
+    const centerYPx = (params.positionY / 100) * params.pageHeight + heightPx / 2;
+    adjustedHeight = (MIN_HEIGHT_PX / params.pageHeight) * 100;
+
+    adjustedPositionY = ((centerYPx - MIN_HEIGHT_PX / 2) / params.pageHeight) * 100;
+
+    if (adjustedPositionY < 0) {
+      adjustedPositionY = 0;
+    } else if (adjustedPositionY + adjustedHeight > 100) {
+      adjustedPositionY = 100 - adjustedHeight;
+    }
+  }
+
+  return {
+    positionX: adjustedPositionX,
+    positionY: adjustedPositionY,
+    width: adjustedWidth,
+    height: adjustedHeight,
+  };
+};
+
 const FieldSettingsTypeTranslations: Record<FieldType, MessageDescriptor> = {
   [FieldType.SIGNATURE]: msg`Signature Settings`,
   [FieldType.FREE_SIGNATURE]: msg`Free Signature Settings`,
@@ -69,6 +164,9 @@ export const EnvelopeEditorFieldsPage = () => {
   const { currentEnvelopeItem } = useCurrentEnvelopeRender();
 
   const { t } = useLingui();
+  const { toast } = useToast();
+
+  const [isAutoAddingFields, setIsAutoAddingFields] = useState(false);
 
   const selectedField = useMemo(
     () => structuredClone(editorFields.selectedField),
@@ -187,6 +285,134 @@ export const EnvelopeEditorFieldsPage = () => {
               selectedRecipientId={editorFields.selectedRecipient?.id ?? null}
               selectedEnvelopeItemId={currentEnvelopeItem?.id ?? null}
             />
+
+            <Button
+              className="mt-4 w-full"
+              variant="outline"
+              disabled={isAutoAddingFields}
+              onClick={async () => {
+                setIsAutoAddingFields(true);
+
+                try {
+                  const blob = await compositePageToBlob(1);
+
+                  if (!blob) {
+                    toast({
+                      title: t`Error`,
+                      description: t`Failed to capture page. Please ensure the document is fully loaded.`,
+                      variant: 'destructive',
+                    });
+                    return;
+                  }
+
+                  console.log('Successfully captured page 1 as PNG Blob:', {
+                    size: `${(blob.size / 1024).toFixed(2)} KB`,
+                    type: blob.type,
+                  });
+                  console.log('Blob object:', blob);
+
+                  console.log('[Auto Add Fields] Sending image to AI endpoint...');
+                  const formData = new FormData();
+                  formData.append('image', blob, 'page-1.png');
+
+                  const response = await fetch('/api/ai/detect-object-and-draw', {
+                    method: 'POST',
+                    body: formData,
+                    credentials: 'include',
+                  });
+
+                  if (!response.ok) {
+                    throw new Error(`AI detection failed: ${response.statusText}`);
+                  }
+
+                  const detectedFields = await response.json();
+                  console.log(
+                    `[Auto Add Fields] Detected ${detectedFields.length} fields:`,
+                    detectedFields,
+                  );
+
+                  if (!editorFields.selectedRecipient || !currentEnvelopeItem) {
+                    toast({
+                      title: t`Warning`,
+                      description: t`Please select a recipient before adding fields.`,
+                      variant: 'destructive',
+                    });
+                    return;
+                  }
+
+                  const pageCanvasRefs = getPageCanvasRefs(1);
+                  if (!pageCanvasRefs) {
+                    console.warn(
+                      '[Auto Add Fields] Could not get page dimensions for minimum field enforcement',
+                    );
+                  }
+
+                  let addedCount = 0;
+                  for (const detected of detectedFields) {
+                    const [ymin, xmin, ymax, xmax] = detected.box_2d;
+                    let positionX = (xmin / 1000) * 100;
+                    let positionY = (ymin / 1000) * 100;
+                    let width = ((xmax - xmin) / 1000) * 100;
+                    let height = ((ymax - ymin) / 1000) * 100;
+
+                    if (pageCanvasRefs) {
+                      const adjusted = enforceMinimumFieldDimensions({
+                        positionX,
+                        positionY,
+                        width,
+                        height,
+                        pageWidth: pageCanvasRefs.pdfCanvas.width,
+                        pageHeight: pageCanvasRefs.pdfCanvas.height,
+                      });
+
+                      positionX = adjusted.positionX;
+                      positionY = adjusted.positionY;
+                      width = adjusted.width;
+                      height = adjusted.height;
+                    }
+
+                    const fieldType = detected.label as FieldType;
+
+                    try {
+                      editorFields.addField({
+                        envelopeItemId: currentEnvelopeItem.id,
+                        page: 1,
+                        type: fieldType,
+                        positionX,
+                        positionY,
+                        width,
+                        height,
+                        recipientId: editorFields.selectedRecipient.id,
+                        fieldMeta: structuredClone(FIELD_META_DEFAULT_VALUES[fieldType]),
+                      });
+                      addedCount++;
+                    } catch (error) {
+                      console.error(`Failed to add ${fieldType} field:`, error);
+                    }
+                  }
+
+                  console.log(
+                    `[Auto Add Fields] Successfully added ${addedCount} fields to the document`,
+                  );
+
+                  toast({
+                    title: t`Success`,
+                    description: t`Added ${addedCount} fields to the document`,
+                  });
+                } catch (error) {
+                  console.error('Auto add fields error:', error);
+                  toast({
+                    title: t`Error`,
+                    description: t`An unexpected error occurred while capturing the page.`,
+                    variant: 'destructive',
+                  });
+                } finally {
+                  setIsAutoAddingFields(false);
+                }
+              }}
+            >
+              {isAutoAddingFields ? <Trans>Processing...</Trans> : <Trans>Auto add fields</Trans>}
+            </Button>
           </section>
 
           {/* Field details section. */}
diff --git a/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx b/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx
index fa19bb6a1..caeea13e7 100644
--- a/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx
+++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-upload-page.tsx
@@ -1,12 +1,11 @@
 import { useMemo, useState } from 'react';
 
-import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
 import type { DropResult } from '@hello-pangea/dnd';
+import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
 import { msg } from '@lingui/core/macro';
 import { Trans, useLingui } from '@lingui/react/macro';
 import { DocumentStatus } from '@prisma/client';
-import { FileWarningIcon, GripVerticalIcon, Loader2 } from 'lucide-react';
-import { X } from 'lucide-react';
+import { FileWarningIcon, GripVerticalIcon, Loader2, X } from 'lucide-react';
 import { ErrorCode as DropzoneErrorCode, type FileRejection } from 'react-dropzone';
 import { Link } from 'react-router';
 
diff --git a/apps/remix/server/api/ai.ts b/apps/remix/server/api/ai.ts
index 4929a71e3..a37aec185 100644
--- a/apps/remix/server/api/ai.ts
+++ b/apps/remix/server/api/ai.ts
@@ -13,7 +13,6 @@ import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error';
 
 import type { HonoEnv } from '../router';
 import {
-  type TDetectObjectsAndDrawResponse,
   type TDetectObjectsResponse,
   type TGenerateTextResponse,
   ZDetectObjectsAndDrawRequestSchema,
@@ -41,6 +40,88 @@ async function resizeAndCompressImage(imageBuffer: Buffer): Promise<Buffer> {
   return await sharp(imageBuffer).jpeg({ quality: 70 }).toBuffer();
 }
 
+const detectObjectsPrompt = `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
+
+IMPORTANT RULES:
+1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
+2. Analyze nearby text labels to determine the field type
+3. Return bounding boxes for the fillable area only, NOT the label text
+4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
+
+FIELD TYPES TO DETECT:
+• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
+• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
+• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
+• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
+• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
+• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
+• RADIO - Empty radio button circles (○) in groups, typically circular selection options
+• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
+• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
+• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
+
+DETECTION GUIDELINES:
+- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
+- If you're uncertain which type fits best, default to TEXT
+- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
+- Signature fields are often longer horizontal lines or larger boxes
+- Date fields often show format hints or date separators (slashes, dashes)
+- Look for visual patterns: underscores (____), horizontal lines, box outlines
+- Return coordinates for the fillable area, not the descriptive label text
+
+COORDINATE SYSTEM:
+- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
+- Top-left corner: ymin and xmin close to 0
+- Bottom-right corner: ymax and xmax close to 1000
+- Coordinates represent positions on a 1000x1000 grid overlaid on the image
+
+FIELD SIZING STRATEGY FOR LINE-BASED FIELDS:
+When detecting thin horizontal lines for SIGNATURE, INITIALS, NAME, EMAIL, DATE, TEXT, or NUMBER fields:
+1. Analyze the visual context around the detected line:
+   - Look at the empty space ABOVE the detected line
+   - Observe the spacing to any text labels, headers, or other form elements above
+   - Assess what would be a reasonable field height to make the field clearly visible when filled
+2. Expand UPWARD from the detected line to create a usable field:
+   - Keep ymax (bottom) at the detected line position (the line becomes the bottom edge)
+   - Extend ymin (top) upward into the available whitespace
+   - Aim to use 60-80% of the clear whitespace above the line, while being reasonable
+   - The expanded field should provide comfortable space for signing/writing (minimum 30 units tall)
+3. Apply minimum dimensions: height at least 30 units (3% of 1000-scale), width at least 36 units
+4. Ensure ymin >= 0 (do not go off-page). If ymin would be negative, clamp to 0
+5. Do NOT apply this expansion to CHECKBOX, RADIO, or DROPDOWN fields - use detected dimensions for those
+6. Example: If you detect a signature line at ymax=500 with clear whitespace extending up to y=400:
+   - Available whitespace: 100 units
+   - Use 60-80% of that: 60-80 units
+   - Expanded field: [ymin=420, xmin=200, ymax=500, xmax=600] (creates 80-unit tall field)
+   - This gives comfortable signing space while respecting the form layout`;
+
+const runObjectDetection = async (imageBuffer: Buffer): Promise<TDetectObjectsResponse> => {
+  const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
+  const base64Image = compressedImageBuffer.toString('base64');
+
+  const result = await generateObject({
+    model: google('gemini-2.5-pro'),
+    schema: ZDetectObjectsResponseSchema,
+    messages: [
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'image',
+            image: `data:image/jpeg;base64,${base64Image}`,
+          },
+          {
+            type: 'text',
+            text: detectObjectsPrompt,
+          },
+        ],
+      },
+    ],
+  });
+
+  return result.object;
+};
+
 export const aiRoute = new Hono<HonoEnv>()
   .use(
     '*',
@@ -85,63 +166,9 @@ export const aiRoute = new Hono<HonoEnv>()
       const { imagePath } = c.req.valid('json');
 
       const imageBuffer = await readFile(imagePath);
-      const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
-      const base64Image = compressedImageBuffer.toString('base64');
+      const detectedObjects = await runObjectDetection(imageBuffer);
 
-      const result = await generateObject({
-        model: google('gemini-2.5-pro'),
-        schema: ZDetectObjectsResponseSchema,
-        messages: [
-          {
-            role: 'user',
-            content: [
-              {
-                type: 'image',
-                image: `data:image/jpeg;base64,${base64Image}`,
-              },
-              {
-                type: 'text',
-                text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
-
-IMPORTANT RULES:
-1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
-2. Analyze nearby text labels to determine the field type
-3. Return bounding boxes for the fillable area only, NOT the label text
-4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
-
-FIELD TYPES TO DETECT:
-• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
-• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
-• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
-• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
-• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
-• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
-• RADIO - Empty radio button circles (○) in groups, typically circular selection options
-• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
-• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
-• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
-
-DETECTION GUIDELINES:
-- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
-- If you're uncertain which type fits best, default to TEXT
-- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
-- Signature fields are often longer horizontal lines or larger boxes
-- Date fields often show format hints or date separators (slashes, dashes)
-- Look for visual patterns: underscores (____), horizontal lines, box outlines
-- Return coordinates for the fillable area, not the descriptive label text
-
-COORDINATE SYSTEM:
-- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
-- Top-left corner: ymin and xmin close to 0
-- Bottom-right corner: ymax and xmax close to 1000
-- Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
-              },
-            ],
-          },
-        ],
-      });
-
-      return c.json<TDetectObjectsResponse>(result.object);
+      return c.json<TDetectObjectsResponse>(detectedObjects);
     } catch (error) {
       console.error('Object detection failed:', error);
 
@@ -156,218 +183,165 @@ COORDINATE SYSTEM:
     }
   })
 
-  .post(
-    '/detect-object-and-draw',
-    sValidator('json', ZDetectObjectsAndDrawRequestSchema),
-    async (c) => {
-      try {
-        await getSession(c.req.raw);
+  .post('/detect-object-and-draw', async (c) => {
+    try {
+      await getSession(c.req.raw);
 
-        const { imagePath } = c.req.valid('json');
+      const parsedBody = await c.req.parseBody();
+      const rawImage = parsedBody.image;
+      const imageCandidate = Array.isArray(rawImage) ? rawImage[0] : rawImage;
+      const parsed = ZDetectObjectsAndDrawRequestSchema.safeParse({ image: imageCandidate });
 
-        console.log(`[detect-object-and-draw] Reading image from: ${imagePath}`);
-
-        const imageBuffer = await readFile(imagePath);
-        const metadata = await sharp(imageBuffer).metadata();
-        const imageWidth = metadata.width;
-        const imageHeight = metadata.height;
-
-        console.log(
-          `[detect-object-and-draw] Original image dimensions: ${imageWidth}x${imageHeight}`,
-        );
-
-        if (!imageWidth || !imageHeight) {
-          throw new AppError(AppErrorCode.INVALID_REQUEST, {
-            message: 'Unable to extract image dimensions',
-            userMessage: 'The image file appears to be invalid or corrupted.',
-          });
-        }
-
-        console.log('[detect-object-and-draw] Compressing image for Gemini API...');
-        const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
-        const base64Image = compressedImageBuffer.toString('base64');
-
-        console.log('[detect-object-and-draw] Calling Gemini API for form field detection...');
-        const result = await generateObject({
-          model: google('gemini-2.5-pro'),
-          schema: ZDetectObjectsResponseSchema,
-          messages: [
-            {
-              role: 'user',
-              content: [
-                {
-                  type: 'image',
-                  image: `data:image/jpeg;base64,${base64Image}`,
-                },
-                {
-                  type: 'text',
-                  text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
-
-IMPORTANT RULES:
-1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
-2. Analyze nearby text labels to determine the field type
-3. Return bounding boxes for the fillable area only, NOT the label text
-4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
-
-FIELD TYPES TO DETECT:
-• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
-• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
-• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
-• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
-• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
-• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
-• RADIO - Empty radio button circles (○) in groups, typically circular selection options
-• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
-• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
-• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
-
-DETECTION GUIDELINES:
-- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
-- If you're uncertain which type fits best, default to TEXT
-- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
-- Signature fields are often longer horizontal lines or larger boxes
-- Date fields often show format hints or date separators (slashes, dashes)
-- Look for visual patterns: underscores (____), horizontal lines, box outlines
-- Return coordinates for the fillable area, not the descriptive label text
-
-COORDINATE SYSTEM:
-- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
-- Top-left corner: ymin and xmin close to 0
-- Bottom-right corner: ymax and xmax close to 1000
-- Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
-                },
-              ],
-            },
-          ],
-        });
-        console.log('[detect-object-and-draw] Gemini API call completed');
-
-        const detectedObjects = result.object;
-
-        console.log(
-          `[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`,
-        );
-
-        const padding = { left: 80, top: 20, right: 20, bottom: 40 };
-        const canvas = new Canvas(
-          imageWidth + padding.left + padding.right,
-          imageHeight + padding.top + padding.bottom,
-        );
-        const ctx = canvas.getContext('2d');
-
-        const img = new Image();
-        img.src = imageBuffer;
-        ctx.drawImage(img, padding.left, padding.top);
-
-        ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)';
-        ctx.lineWidth = 1;
-
-        for (let i = 0; i <= 1000; i += 100) {
-          const x = padding.left + (i / 1000) * imageWidth;
-          ctx.beginPath();
-          ctx.moveTo(x, padding.top);
-          ctx.lineTo(x, imageHeight + padding.top);
-          ctx.stroke();
-        }
-
-        // Horizontal grid lines (every 100 units on 0-1000 scale)
-        for (let i = 0; i <= 1000; i += 100) {
-          const y = padding.top + (i / 1000) * imageHeight;
-          ctx.beginPath();
-          ctx.moveTo(padding.left, y);
-          ctx.lineTo(imageWidth + padding.left, y);
-          ctx.stroke();
-        }
-
-        const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
-
-        detectedObjects.forEach((obj, index) => {
-          const [ymin, xmin, ymax, xmax] = obj.box_2d.map((coord) => coord / 1000);
-
-          const x = xmin * imageWidth + padding.left;
-          const y = ymin * imageHeight + padding.top;
-          const width = (xmax - xmin) * imageWidth;
-          const height = (ymax - ymin) * imageHeight;
-
-          ctx.strokeStyle = colors[index % colors.length];
-          ctx.lineWidth = 5;
-          ctx.strokeRect(x, y, width, height);
-
-          ctx.fillStyle = colors[index % colors.length];
-          ctx.font = '20px Arial';
-          ctx.fillText(obj.label, x, y - 5);
-        });
-
-        ctx.strokeStyle = '#000000';
-        ctx.lineWidth = 1;
-        ctx.font = '26px Arial';
-
-        ctx.beginPath();
-        ctx.moveTo(padding.left, padding.top);
-        ctx.lineTo(padding.left, imageHeight + padding.top);
-        ctx.stroke();
-
-        ctx.textAlign = 'right';
-        ctx.textBaseline = 'middle';
-        for (let i = 0; i <= 1000; i += 100) {
-          const y = padding.top + (i / 1000) * imageHeight;
-          ctx.fillStyle = '#000000';
-          ctx.fillText(i.toString(), padding.left - 5, y);
-
-          ctx.beginPath();
-          ctx.moveTo(padding.left - 5, y);
-          ctx.lineTo(padding.left, y);
-          ctx.stroke();
-        }
-
-        ctx.beginPath();
-        ctx.moveTo(padding.left, imageHeight + padding.top);
-        ctx.lineTo(imageWidth + padding.left, imageHeight + padding.top);
-        ctx.stroke();
-
-        ctx.textAlign = 'center';
-        ctx.textBaseline = 'top';
-        for (let i = 0; i <= 1000; i += 100) {
-          const x = padding.left + (i / 1000) * imageWidth;
-          ctx.fillStyle = '#000000';
-          ctx.fillText(i.toString(), x, imageHeight + padding.top + 5);
-
-          ctx.beginPath();
-          ctx.moveTo(x, imageHeight + padding.top);
-          ctx.lineTo(x, imageHeight + padding.top + 5);
-          ctx.stroke();
-        }
-
-        const now = new Date();
-        const timestamp = now
-          .toISOString()
-          .replace(/[-:]/g, '')
-          .replace(/\..+/, '')
-          .replace('T', '_');
-        const outputFilename = `detected_objects_${timestamp}.png`;
-        const outputPath = join(process.cwd(), outputFilename);
-
-        console.log('[detect-object-and-draw] Converting canvas to PNG buffer...');
-        const pngBuffer = await canvas.toBuffer('png');
-        console.log(`[detect-object-and-draw] Saving to: ${outputPath}`);
-        await writeFile(outputPath, pngBuffer);
-
-        console.log('[detect-object-and-draw] Image saved successfully!');
-        return c.json<TDetectObjectsAndDrawResponse>({
-          outputPath,
-          detectedObjects,
-        });
-      } catch (error) {
-        console.error('Object detection and drawing failed:', error);
-
-        if (error instanceof AppError) {
-          throw error;
-        }
-
-        throw new AppError(AppErrorCode.UNKNOWN_ERROR, {
-          message: 'Failed to detect objects and draw',
-          userMessage: 'An error occurred while detecting and drawing objects. Please try again.',
+      if (!parsed.success) {
+        throw new AppError(AppErrorCode.INVALID_REQUEST, {
+          message: 'Image file is required',
+          userMessage: 'Please upload a valid image file.',
         });
       }
-    },
-  );
+
+      const imageBlob = parsed.data.image;
+      const arrayBuffer = await imageBlob.arrayBuffer();
+      const imageBuffer = Buffer.from(arrayBuffer);
+      const metadata = await sharp(imageBuffer).metadata();
+      const imageWidth = metadata.width;
+      const imageHeight = metadata.height;
+
+      console.log(
+        `[detect-object-and-draw] Original image dimensions: ${imageWidth}x${imageHeight}`,
+      );
+
+      if (!imageWidth || !imageHeight) {
+        throw new AppError(AppErrorCode.INVALID_REQUEST, {
+          message: 'Unable to extract image dimensions',
+          userMessage: 'The image file appears to be invalid or corrupted.',
+        });
+      }
+
+      console.log('[detect-object-and-draw] Compressing image for Gemini API...');
+      console.log('[detect-object-and-draw] Calling Gemini API for form field detection...');
+      const detectedObjects = await runObjectDetection(imageBuffer);
+      console.log('[detect-object-and-draw] Gemini API call completed');
+
+      console.log(
+        `[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`,
+      );
+
+      const padding = { left: 80, top: 20, right: 20, bottom: 40 };
+      const canvas = new Canvas(
+        imageWidth + padding.left + padding.right,
+        imageHeight + padding.top + padding.bottom,
+      );
+      const ctx = canvas.getContext('2d');
+
+      const img = new Image();
+      img.src = imageBuffer;
+      ctx.drawImage(img, padding.left, padding.top);
+
+      ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)';
+      ctx.lineWidth = 1;
+
+      for (let i = 0; i <= 1000; i += 100) {
+        const x = padding.left + (i / 1000) * imageWidth;
+        ctx.beginPath();
+        ctx.moveTo(x, padding.top);
+        ctx.lineTo(x, imageHeight + padding.top);
+        ctx.stroke();
+      }
+
+      // Horizontal grid lines (every 100 units on 0-1000 scale)
+      for (let i = 0; i <= 1000; i += 100) {
+        const y = padding.top + (i / 1000) * imageHeight;
+        ctx.beginPath();
+        ctx.moveTo(padding.left, y);
+        ctx.lineTo(imageWidth + padding.left, y);
+        ctx.stroke();
+      }
+
+      const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
+
+      detectedObjects.forEach((obj, index) => {
+        const [ymin, xmin, ymax, xmax] = obj.box_2d.map((coord) => coord / 1000);
+
+        const x = xmin * imageWidth + padding.left;
+        const y = ymin * imageHeight + padding.top;
+        const width = (xmax - xmin) * imageWidth;
+        const height = (ymax - ymin) * imageHeight;
+
+        ctx.strokeStyle = colors[index % colors.length];
+        ctx.lineWidth = 5;
+        ctx.strokeRect(x, y, width, height);
+
+        ctx.fillStyle = colors[index % colors.length];
+        ctx.font = '20px Arial';
+        ctx.fillText(obj.label, x, y - 5);
+      });
+
+      ctx.strokeStyle = '#000000';
+      ctx.lineWidth = 1;
+      ctx.font = '26px Arial';
+
+      ctx.beginPath();
+      ctx.moveTo(padding.left, padding.top);
+      ctx.lineTo(padding.left, imageHeight + padding.top);
+      ctx.stroke();
+
+      ctx.textAlign = 'right';
+      ctx.textBaseline = 'middle';
+      for (let i = 0; i <= 1000; i += 100) {
+        const y = padding.top + (i / 1000) * imageHeight;
+        ctx.fillStyle = '#000000';
+        ctx.fillText(i.toString(), padding.left - 5, y);
+
+        ctx.beginPath();
+        ctx.moveTo(padding.left - 5, y);
+        ctx.lineTo(padding.left, y);
+        ctx.stroke();
+      }
+
+      ctx.beginPath();
+      ctx.moveTo(padding.left, imageHeight + padding.top);
+      ctx.lineTo(imageWidth + padding.left, imageHeight + padding.top);
+      ctx.stroke();
+
+      ctx.textAlign = 'center';
+      ctx.textBaseline = 'top';
+      for (let i = 0; i <= 1000; i += 100) {
+        const x = padding.left + (i / 1000) * imageWidth;
+        ctx.fillStyle = '#000000';
+        ctx.fillText(i.toString(), x, imageHeight + padding.top + 5);
+
+        ctx.beginPath();
+        ctx.moveTo(x, imageHeight + padding.top);
+        ctx.lineTo(x, imageHeight + padding.top + 5);
+        ctx.stroke();
+      }
+
+      const now = new Date();
+      const timestamp = now
+        .toISOString()
+        .replace(/[-:]/g, '')
+        .replace(/\..+/, '')
+        .replace('T', '_');
+      const outputFilename = `detected_objects_${timestamp}.png`;
+      const outputPath = join(process.cwd(), outputFilename);
+
+      console.log('[detect-object-and-draw] Converting canvas to PNG buffer...');
+      const pngBuffer = await canvas.toBuffer('png');
+      console.log(`[detect-object-and-draw] Saving to: ${outputPath}`);
+      await writeFile(outputPath, pngBuffer);
+
+      console.log('[detect-object-and-draw] Image saved successfully!');
+      return c.json<TDetectObjectsResponse>(detectedObjects);
+    } catch (error) {
+      console.error('Object detection and drawing failed:', error);
+
+      if (error instanceof AppError) {
+        throw error;
+      }
+
+      throw new AppError(AppErrorCode.UNKNOWN_ERROR, {
+        message: 'Failed to detect objects and draw',
+        userMessage: 'An error occurred while detecting and drawing objects. Please try again.',
+      });
+    }
+  });
diff --git a/apps/remix/server/api/ai.types.ts b/apps/remix/server/api/ai.types.ts
index 1293d6c10..f5715ae8e 100644
--- a/apps/remix/server/api/ai.types.ts
+++ b/apps/remix/server/api/ai.types.ts
@@ -44,13 +44,7 @@ export type TDetectObjectsRequest = z.infer<typeof ZDetectObjectsRequestSchema>;
 export type TDetectObjectsResponse = z.infer<typeof ZDetectObjectsResponseSchema>;
 
 export const ZDetectObjectsAndDrawRequestSchema = z.object({
-  imagePath: z.string().min(1, 'Image path is required'),
-});
-
-export const ZDetectObjectsAndDrawResponseSchema = z.object({
-  outputPath: z.string().describe('Path to the generated image with bounding boxes'),
-  detectedObjects: z.array(ZDetectedObjectSchema).describe('Array of detected objects'),
+  image: z.instanceof(Blob, { message: 'Image file is required' }),
 });
 
 export type TDetectObjectsAndDrawRequest = z.infer<typeof ZDetectObjectsAndDrawRequestSchema>;
-export type TDetectObjectsAndDrawResponse = z.infer<typeof ZDetectObjectsAndDrawResponseSchema>;
diff --git a/packages/lib/client-only/utils/page-canvas-registry.ts b/packages/lib/client-only/utils/page-canvas-registry.ts
new file mode 100644
index 000000000..a96b06432
--- /dev/null
+++ b/packages/lib/client-only/utils/page-canvas-registry.ts
@@ -0,0 +1,110 @@
+import type Konva from 'konva';
+
+/**
+ * Represents canvas references for a specific PDF page.
+ */
+export interface PageCanvasRefs {
+  /** The page number (1-indexed) */
+  pageNumber: number;
+  /** The canvas element containing the rendered PDF */
+  pdfCanvas: HTMLCanvasElement;
+  /** The Konva stage containing field overlays */
+  konvaStage: Konva.Stage;
+}
+
+/**
+ * Module-level registry to store canvas references by page number.
+ * This allows any component to access page canvases without prop drilling.
+ */
+const pageCanvasRegistry = new Map<number, PageCanvasRefs>();
+
+/**
+ * Register a page's canvas references.
+ * Call this when a page renderer mounts and has valid canvas refs.
+ *
+ * @param refs - The canvas references to register
+ */
+export const registerPageCanvas = (refs: PageCanvasRefs): void => {
+  pageCanvasRegistry.set(refs.pageNumber, refs);
+};
+
+/**
+ * Unregister a page's canvas references.
+ * Call this when a page renderer unmounts to prevent memory leaks.
+ *
+ * @param pageNumber - The page number to unregister
+ */
+export const unregisterPageCanvas = (pageNumber: number): void => {
+  pageCanvasRegistry.delete(pageNumber);
+};
+
+/**
+ * Get canvas references for a specific page.
+ *
+ * @param pageNumber - The page number to retrieve
+ * @returns The canvas references, or undefined if not registered
+ */
+export const getPageCanvasRefs = (pageNumber: number): PageCanvasRefs | undefined => {
+  return pageCanvasRegistry.get(pageNumber);
+};
+
+/**
+ * Get all registered page numbers.
+ *
+ * @returns Array of page numbers currently registered
+ */
+export const getRegisteredPageNumbers = (): number[] => {
+  return Array.from(pageCanvasRegistry.keys()).sort((a, b) => a - b);
+};
+
+/**
+ * Composite a PDF page with its field overlays into a single PNG Blob.
+ * This creates a temporary canvas, draws the PDF canvas first (background),
+ * then draws the Konva canvas on top (field overlays).
+ *
+ * @param pageNumber - The page number to composite (1-indexed)
+ * @returns Promise that resolves to a PNG Blob, or null if page not found or compositing fails
+ */
+export const compositePageToBlob = async (pageNumber: number): Promise<Blob | null> => {
+  const refs = getPageCanvasRefs(pageNumber);
+
+  if (!refs) {
+    console.warn(`Page ${pageNumber} is not registered for canvas capture`);
+    return null;
+  }
+
+  try {
+    // Create temporary canvas with same dimensions as PDF canvas
+    const tempCanvas = document.createElement('canvas');
+    tempCanvas.width = refs.pdfCanvas.width;
+    tempCanvas.height = refs.pdfCanvas.height;
+
+    const ctx = tempCanvas.getContext('2d');
+    if (!ctx) {
+      console.error('Failed to get 2D context for temporary canvas');
+      return null;
+    }
+
+    // Draw PDF canvas first (background layer)
+    ctx.drawImage(refs.pdfCanvas, 0, 0);
+
+    // Get Konva canvas and draw on top (field overlays)
+    // Note: Konva's toCanvas() returns a new canvas with all layers rendered
+    const konvaCanvas = refs.konvaStage.toCanvas();
+    ctx.drawImage(konvaCanvas, 0, 0);
+
+    // Convert to PNG Blob
+    return new Promise((resolve, reject) => {
+      tempCanvas.toBlob((blob) => {
+        if (blob) {
+          resolve(blob);
+        } else {
+          reject(new Error('Failed to convert canvas to blob'));
+        }
+      }, 'image/png');
+    });
+  } catch (error) {
+    console.error(`Error compositing page ${pageNumber}:`, error);
+    return null;
+  }
+};
diff --git a/packages/lib/universal/field-renderer/field-constants.ts b/packages/lib/universal/field-renderer/field-constants.ts
new file mode 100644
index 000000000..4473b439e
--- /dev/null
+++ b/packages/lib/universal/field-renderer/field-constants.ts
@@ -0,0 +1,19 @@
+/**
+ * Shared constants for field dimension enforcement.
+ *
+ * These constants ensure consistency between:
+ * 1. AI prompt (server/api/ai.ts) - instructs Gemini on minimum field dimensions
+ * 2. Client enforcement (envelope-editor-fields-page.tsx) - fallback validation
+ */
+
+/**
+ * Minimum field height in pixels.
+ * Fields smaller than this will be expanded to meet minimum usability requirements.
+ */
+export const MIN_FIELD_HEIGHT_PX = 30;
+
+/**
+ * Minimum field width in pixels.
+ * Fields smaller than this will be expanded to meet minimum usability requirements.
+ */
+export const MIN_FIELD_WIDTH_PX = 36;