mirror of
https://github.com/documenso/documenso.git
synced 2025-11-14 00:32:43 +10:00
feat: add AI field auto-placement with canvas registry
This commit is contained in:
@ -11,6 +11,10 @@ import type { TLocalField } from '@documenso/lib/client-only/hooks/use-editor-fi
|
||||
import { usePageRenderer } from '@documenso/lib/client-only/hooks/use-page-renderer';
|
||||
import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
|
||||
import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
|
||||
import {
|
||||
registerPageCanvas,
|
||||
unregisterPageCanvas,
|
||||
} from '@documenso/lib/client-only/utils/page-canvas-registry';
|
||||
import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
|
||||
import {
|
||||
MIN_FIELD_HEIGHT_PX,
|
||||
@ -56,6 +60,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
|
||||
[editorFields.localFields, pageContext.pageNumber],
|
||||
);
|
||||
|
||||
/**
|
||||
* Cleanup: Unregister canvas when component unmounts
|
||||
*/
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
unregisterPageCanvas(pageContext.pageNumber);
|
||||
};
|
||||
}, [pageContext.pageNumber]);
|
||||
|
||||
const handleResizeOrMove = (event: KonvaEventObject<Event>) => {
|
||||
const { current: container } = canvasElement;
|
||||
|
||||
@ -214,6 +227,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
|
||||
currentStage.on('transformend', () => setIsFieldChanging(false));
|
||||
|
||||
currentPageLayer.batchDraw();
|
||||
|
||||
// Register this page's canvas references now that everything is initialized
|
||||
if (canvasElement.current && currentStage) {
|
||||
registerPageCanvas({
|
||||
pageNumber: pageContext.pageNumber,
|
||||
pdfCanvas: canvasElement.current,
|
||||
konvaStage: currentStage,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { lazy, useEffect, useMemo } from 'react';
|
||||
import { lazy, useEffect, useMemo, useState } from 'react';
|
||||
|
||||
import type { MessageDescriptor } from '@lingui/core';
|
||||
import { msg } from '@lingui/core/macro';
|
||||
@ -11,6 +11,10 @@ import { match } from 'ts-pattern';
|
||||
|
||||
import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
|
||||
import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
|
||||
import {
|
||||
compositePageToBlob,
|
||||
getPageCanvasRefs,
|
||||
} from '@documenso/lib/client-only/utils/page-canvas-registry';
|
||||
import type {
|
||||
TCheckboxFieldMeta,
|
||||
TDateFieldMeta,
|
||||
@ -24,12 +28,15 @@ import type {
|
||||
TSignatureFieldMeta,
|
||||
TTextFieldMeta,
|
||||
} from '@documenso/lib/types/field-meta';
|
||||
import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
|
||||
import { canRecipientFieldsBeModified } from '@documenso/lib/utils/recipients';
|
||||
import { AnimateGenericFadeInOut } from '@documenso/ui/components/animate/animate-generic-fade-in-out';
|
||||
import PDFViewerKonvaLazy from '@documenso/ui/components/pdf-viewer/pdf-viewer-konva-lazy';
|
||||
import { Alert, AlertDescription } from '@documenso/ui/primitives/alert';
|
||||
import { Button } from '@documenso/ui/primitives/button';
|
||||
import { RecipientSelector } from '@documenso/ui/primitives/recipient-selector';
|
||||
import { Separator } from '@documenso/ui/primitives/separator';
|
||||
import { useToast } from '@documenso/ui/primitives/use-toast';
|
||||
|
||||
import { EditorFieldCheckboxForm } from '~/components/forms/editor/editor-field-checkbox-form';
|
||||
import { EditorFieldDateForm } from '~/components/forms/editor/editor-field-date-form';
|
||||
@ -49,6 +56,94 @@ const EnvelopeEditorFieldsPageRenderer = lazy(
|
||||
async () => import('./envelope-editor-fields-page-renderer'),
|
||||
);
|
||||
|
||||
/**
|
||||
* Enforces minimum field dimensions and centers the field when expanding to meet minimums.
|
||||
*
|
||||
* AI often detects form lines as very thin fields (0.2-0.5% height). This function ensures
|
||||
* fields meet minimum usability requirements by expanding them to at least 30px height and
|
||||
* 36px width, while keeping them centered on their original position.
|
||||
*
|
||||
* @param params - Field dimensions and page size
|
||||
* @param params.positionX - Field X position as percentage (0-100)
|
||||
* @param params.positionY - Field Y position as percentage (0-100)
|
||||
* @param params.width - Field width as percentage (0-100)
|
||||
* @param params.height - Field height as percentage (0-100)
|
||||
* @param params.pageWidth - Page width in pixels
|
||||
* @param params.pageHeight - Page height in pixels
|
||||
* @returns Adjusted field dimensions with minimums enforced and centered
|
||||
*
|
||||
* @example
|
||||
* // AI detected a thin line: 0.3% height
|
||||
* const adjusted = enforceMinimumFieldDimensions({
|
||||
* positionX: 20, positionY: 50, width: 30, height: 0.3,
|
||||
* pageWidth: 800, pageHeight: 1100
|
||||
* });
|
||||
* // Result: height expanded to ~2.7% (30px), centered on original position
|
||||
*/
|
||||
/**
|
||||
* Enforces minimum field dimensions with centered expansion.
|
||||
*
|
||||
* If a field is smaller than the minimum width or height, it will be expanded
|
||||
* to meet the minimum requirements while staying centered on its original position.
|
||||
*/
|
||||
const enforceMinimumFieldDimensions = (params: {
|
||||
positionX: number;
|
||||
positionY: number;
|
||||
width: number;
|
||||
height: number;
|
||||
pageWidth: number;
|
||||
pageHeight: number;
|
||||
}): {
|
||||
positionX: number;
|
||||
positionY: number;
|
||||
width: number;
|
||||
height: number;
|
||||
} => {
|
||||
const MIN_HEIGHT_PX = 30;
|
||||
const MIN_WIDTH_PX = 36;
|
||||
|
||||
// Convert percentage to pixels to check against minimums
|
||||
const widthPx = (params.width / 100) * params.pageWidth;
|
||||
const heightPx = (params.height / 100) * params.pageHeight;
|
||||
|
||||
let adjustedWidth = params.width;
|
||||
let adjustedHeight = params.height;
|
||||
let adjustedPositionX = params.positionX;
|
||||
let adjustedPositionY = params.positionY;
|
||||
|
||||
if (widthPx < MIN_WIDTH_PX) {
|
||||
const centerXPx = (params.positionX / 100) * params.pageWidth + widthPx / 2;
|
||||
adjustedWidth = (MIN_WIDTH_PX / params.pageWidth) * 100;
|
||||
adjustedPositionX = ((centerXPx - MIN_WIDTH_PX / 2) / params.pageWidth) * 100;
|
||||
|
||||
if (adjustedPositionX < 0) {
|
||||
adjustedPositionX = 0;
|
||||
} else if (adjustedPositionX + adjustedWidth > 100) {
|
||||
adjustedPositionX = 100 - adjustedWidth;
|
||||
}
|
||||
}
|
||||
|
||||
if (heightPx < MIN_HEIGHT_PX) {
|
||||
const centerYPx = (params.positionY / 100) * params.pageHeight + heightPx / 2;
|
||||
adjustedHeight = (MIN_HEIGHT_PX / params.pageHeight) * 100;
|
||||
|
||||
adjustedPositionY = ((centerYPx - MIN_HEIGHT_PX / 2) / params.pageHeight) * 100;
|
||||
|
||||
if (adjustedPositionY < 0) {
|
||||
adjustedPositionY = 0;
|
||||
} else if (adjustedPositionY + adjustedHeight > 100) {
|
||||
adjustedPositionY = 100 - adjustedHeight;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
positionX: adjustedPositionX,
|
||||
positionY: adjustedPositionY,
|
||||
width: adjustedWidth,
|
||||
height: adjustedHeight,
|
||||
};
|
||||
};
|
||||
|
||||
const FieldSettingsTypeTranslations: Record<FieldType, MessageDescriptor> = {
|
||||
[FieldType.SIGNATURE]: msg`Signature Settings`,
|
||||
[FieldType.FREE_SIGNATURE]: msg`Free Signature Settings`,
|
||||
@ -69,6 +164,9 @@ export const EnvelopeEditorFieldsPage = () => {
|
||||
const { currentEnvelopeItem } = useCurrentEnvelopeRender();
|
||||
|
||||
const { t } = useLingui();
|
||||
const { toast } = useToast();
|
||||
|
||||
const [isAutoAddingFields, setIsAutoAddingFields] = useState(false);
|
||||
|
||||
const selectedField = useMemo(
|
||||
() => structuredClone(editorFields.selectedField),
|
||||
@ -187,6 +285,134 @@ export const EnvelopeEditorFieldsPage = () => {
|
||||
selectedRecipientId={editorFields.selectedRecipient?.id ?? null}
|
||||
selectedEnvelopeItemId={currentEnvelopeItem?.id ?? null}
|
||||
/>
|
||||
|
||||
<Button
|
||||
className="mt-4 w-full"
|
||||
variant="outline"
|
||||
disabled={isAutoAddingFields}
|
||||
onClick={async () => {
|
||||
setIsAutoAddingFields(true);
|
||||
|
||||
try {
|
||||
const blob = await compositePageToBlob(1);
|
||||
|
||||
if (!blob) {
|
||||
toast({
|
||||
title: t`Error`,
|
||||
description: t`Failed to capture page. Please ensure the document is fully loaded.`,
|
||||
variant: 'destructive',
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('Successfully captured page 1 as PNG Blob:', {
|
||||
size: `${(blob.size / 1024).toFixed(2)} KB`,
|
||||
type: blob.type,
|
||||
});
|
||||
console.log('Blob object:', blob);
|
||||
|
||||
console.log('[Auto Add Fields] Sending image to AI endpoint...');
|
||||
const formData = new FormData();
|
||||
formData.append('image', blob, 'page-1.png');
|
||||
|
||||
const response = await fetch('/api/ai/detect-object-and-draw', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
credentials: 'include',
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`AI detection failed: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const detectedFields = await response.json();
|
||||
console.log(
|
||||
`[Auto Add Fields] Detected ${detectedFields.length} fields:`,
|
||||
detectedFields,
|
||||
);
|
||||
|
||||
if (!editorFields.selectedRecipient || !currentEnvelopeItem) {
|
||||
toast({
|
||||
title: t`Warning`,
|
||||
description: t`Please select a recipient before adding fields.`,
|
||||
variant: 'destructive',
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const pageCanvasRefs = getPageCanvasRefs(1);
|
||||
if (!pageCanvasRefs) {
|
||||
console.warn(
|
||||
'[Auto Add Fields] Could not get page dimensions for minimum field enforcement',
|
||||
);
|
||||
}
|
||||
|
||||
let addedCount = 0;
|
||||
for (const detected of detectedFields) {
|
||||
const [ymin, xmin, ymax, xmax] = detected.box_2d;
|
||||
let positionX = (xmin / 1000) * 100;
|
||||
let positionY = (ymin / 1000) * 100;
|
||||
let width = ((xmax - xmin) / 1000) * 100;
|
||||
let height = ((ymax - ymin) / 1000) * 100;
|
||||
|
||||
if (pageCanvasRefs) {
|
||||
const adjusted = enforceMinimumFieldDimensions({
|
||||
positionX,
|
||||
positionY,
|
||||
width,
|
||||
height,
|
||||
pageWidth: pageCanvasRefs.pdfCanvas.width,
|
||||
pageHeight: pageCanvasRefs.pdfCanvas.height,
|
||||
});
|
||||
|
||||
positionX = adjusted.positionX;
|
||||
positionY = adjusted.positionY;
|
||||
width = adjusted.width;
|
||||
height = adjusted.height;
|
||||
}
|
||||
|
||||
const fieldType = detected.label as FieldType;
|
||||
|
||||
try {
|
||||
editorFields.addField({
|
||||
envelopeItemId: currentEnvelopeItem.id,
|
||||
page: 1,
|
||||
type: fieldType,
|
||||
positionX,
|
||||
positionY,
|
||||
width,
|
||||
height,
|
||||
recipientId: editorFields.selectedRecipient.id,
|
||||
fieldMeta: structuredClone(FIELD_META_DEFAULT_VALUES[fieldType]),
|
||||
});
|
||||
addedCount++;
|
||||
} catch (error) {
|
||||
console.error(`Failed to add ${fieldType} field:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(
|
||||
`[Auto Add Fields] Successfully added ${addedCount} fields to the document`,
|
||||
);
|
||||
|
||||
toast({
|
||||
title: t`Success`,
|
||||
description: t`Added ${addedCount} fields to the document`,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Auto add fields error:', error);
|
||||
toast({
|
||||
title: t`Error`,
|
||||
description: t`An unexpected error occurred while capturing the page.`,
|
||||
variant: 'destructive',
|
||||
});
|
||||
} finally {
|
||||
setIsAutoAddingFields(false);
|
||||
}
|
||||
}}
|
||||
>
|
||||
{isAutoAddingFields ? <Trans>Processing...</Trans> : <Trans>Auto add fields</Trans>}
|
||||
</Button>
|
||||
</section>
|
||||
|
||||
{/* Field details section. */}
|
||||
|
||||
@ -1,12 +1,11 @@
|
||||
import { useMemo, useState } from 'react';
|
||||
|
||||
import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
|
||||
import type { DropResult } from '@hello-pangea/dnd';
|
||||
import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
|
||||
import { msg } from '@lingui/core/macro';
|
||||
import { Trans, useLingui } from '@lingui/react/macro';
|
||||
import { DocumentStatus } from '@prisma/client';
|
||||
import { FileWarningIcon, GripVerticalIcon, Loader2 } from 'lucide-react';
|
||||
import { X } from 'lucide-react';
|
||||
import { FileWarningIcon, GripVerticalIcon, Loader2, X } from 'lucide-react';
|
||||
import { ErrorCode as DropzoneErrorCode, type FileRejection } from 'react-dropzone';
|
||||
import { Link } from 'react-router';
|
||||
|
||||
|
||||
@ -13,7 +13,6 @@ import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error';
|
||||
|
||||
import type { HonoEnv } from '../router';
|
||||
import {
|
||||
type TDetectObjectsAndDrawResponse,
|
||||
type TDetectObjectsResponse,
|
||||
type TGenerateTextResponse,
|
||||
ZDetectObjectsAndDrawRequestSchema,
|
||||
@ -41,6 +40,88 @@ async function resizeAndCompressImage(imageBuffer: Buffer): Promise<Buffer> {
|
||||
return await sharp(imageBuffer).jpeg({ quality: 70 }).toBuffer();
|
||||
}
|
||||
|
||||
const detectObjectsPrompt = `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
|
||||
|
||||
IMPORTANT RULES:
|
||||
1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
|
||||
2. Analyze nearby text labels to determine the field type
|
||||
3. Return bounding boxes for the fillable area only, NOT the label text
|
||||
4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
|
||||
|
||||
FIELD TYPES TO DETECT:
|
||||
• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
|
||||
• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
|
||||
• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
|
||||
• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
|
||||
• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
|
||||
• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
|
||||
• RADIO - Empty radio button circles (○) in groups, typically circular selection options
|
||||
• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
|
||||
• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
|
||||
• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
|
||||
|
||||
DETECTION GUIDELINES:
|
||||
- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
|
||||
- If you're uncertain which type fits best, default to TEXT
|
||||
- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
|
||||
- Signature fields are often longer horizontal lines or larger boxes
|
||||
- Date fields often show format hints or date separators (slashes, dashes)
|
||||
- Look for visual patterns: underscores (____), horizontal lines, box outlines
|
||||
- Return coordinates for the fillable area, not the descriptive label text
|
||||
|
||||
COORDINATE SYSTEM:
|
||||
- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
|
||||
- Top-left corner: ymin and xmin close to 0
|
||||
- Bottom-right corner: ymax and xmax close to 1000
|
||||
- Coordinates represent positions on a 1000x1000 grid overlaid on the image
|
||||
|
||||
FIELD SIZING STRATEGY FOR LINE-BASED FIELDS:
|
||||
When detecting thin horizontal lines for SIGNATURE, INITIALS, NAME, EMAIL, DATE, TEXT, or NUMBER fields:
|
||||
1. Analyze the visual context around the detected line:
|
||||
- Look at the empty space ABOVE the detected line
|
||||
- Observe the spacing to any text labels, headers, or other form elements above
|
||||
- Assess what would be a reasonable field height to make the field clearly visible when filled
|
||||
2. Expand UPWARD from the detected line to create a usable field:
|
||||
- Keep ymax (bottom) at the detected line position (the line becomes the bottom edge)
|
||||
- Extend ymin (top) upward into the available whitespace
|
||||
- Aim to use 60-80% of the clear whitespace above the line, while being reasonable
|
||||
- The expanded field should provide comfortable space for signing/writing (minimum 30 units tall)
|
||||
3. Apply minimum dimensions: height at least 30 units (3% of 1000-scale), width at least 36 units
|
||||
4. Ensure ymin >= 0 (do not go off-page). If ymin would be negative, clamp to 0
|
||||
5. Do NOT apply this expansion to CHECKBOX, RADIO, or DROPDOWN fields - use detected dimensions for those
|
||||
6. Example: If you detect a signature line at ymax=500 with clear whitespace extending up to y=400:
|
||||
- Available whitespace: 100 units
|
||||
- Use 60-80% of that: 60-80 units
|
||||
- Expanded field: [ymin=420, xmin=200, ymax=500, xmax=600] (creates 80-unit tall field)
|
||||
- This gives comfortable signing space while respecting the form layout`;
|
||||
|
||||
const runObjectDetection = async (imageBuffer: Buffer): Promise<TDetectObjectsResponse> => {
|
||||
const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
|
||||
const base64Image = compressedImageBuffer.toString('base64');
|
||||
|
||||
const result = await generateObject({
|
||||
model: google('gemini-2.5-pro'),
|
||||
schema: ZDetectObjectsResponseSchema,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'image',
|
||||
image: `data:image/jpeg;base64,${base64Image}`,
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: detectObjectsPrompt,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return result.object;
|
||||
};
|
||||
|
||||
export const aiRoute = new Hono<HonoEnv>()
|
||||
.use(
|
||||
'*',
|
||||
@ -85,63 +166,9 @@ export const aiRoute = new Hono<HonoEnv>()
|
||||
const { imagePath } = c.req.valid('json');
|
||||
|
||||
const imageBuffer = await readFile(imagePath);
|
||||
const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
|
||||
const base64Image = compressedImageBuffer.toString('base64');
|
||||
const detectedObjects = await runObjectDetection(imageBuffer);
|
||||
|
||||
const result = await generateObject({
|
||||
model: google('gemini-2.5-pro'),
|
||||
schema: ZDetectObjectsResponseSchema,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'image',
|
||||
image: `data:image/jpeg;base64,${base64Image}`,
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
|
||||
|
||||
IMPORTANT RULES:
|
||||
1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
|
||||
2. Analyze nearby text labels to determine the field type
|
||||
3. Return bounding boxes for the fillable area only, NOT the label text
|
||||
4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
|
||||
|
||||
FIELD TYPES TO DETECT:
|
||||
• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
|
||||
• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
|
||||
• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
|
||||
• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
|
||||
• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
|
||||
• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
|
||||
• RADIO - Empty radio button circles (○) in groups, typically circular selection options
|
||||
• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
|
||||
• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
|
||||
• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
|
||||
|
||||
DETECTION GUIDELINES:
|
||||
- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
|
||||
- If you're uncertain which type fits best, default to TEXT
|
||||
- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
|
||||
- Signature fields are often longer horizontal lines or larger boxes
|
||||
- Date fields often show format hints or date separators (slashes, dashes)
|
||||
- Look for visual patterns: underscores (____), horizontal lines, box outlines
|
||||
- Return coordinates for the fillable area, not the descriptive label text
|
||||
|
||||
COORDINATE SYSTEM:
|
||||
- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
|
||||
- Top-left corner: ymin and xmin close to 0
|
||||
- Bottom-right corner: ymax and xmax close to 1000
|
||||
- Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return c.json<TDetectObjectsResponse>(result.object);
|
||||
return c.json<TDetectObjectsResponse>(detectedObjects);
|
||||
} catch (error) {
|
||||
console.error('Object detection failed:', error);
|
||||
|
||||
@ -156,18 +183,25 @@ COORDINATE SYSTEM:
|
||||
}
|
||||
})
|
||||
|
||||
.post(
|
||||
'/detect-object-and-draw',
|
||||
sValidator('json', ZDetectObjectsAndDrawRequestSchema),
|
||||
async (c) => {
|
||||
.post('/detect-object-and-draw', async (c) => {
|
||||
try {
|
||||
await getSession(c.req.raw);
|
||||
|
||||
const { imagePath } = c.req.valid('json');
|
||||
const parsedBody = await c.req.parseBody();
|
||||
const rawImage = parsedBody.image;
|
||||
const imageCandidate = Array.isArray(rawImage) ? rawImage[0] : rawImage;
|
||||
const parsed = ZDetectObjectsAndDrawRequestSchema.safeParse({ image: imageCandidate });
|
||||
|
||||
console.log(`[detect-object-and-draw] Reading image from: ${imagePath}`);
|
||||
if (!parsed.success) {
|
||||
throw new AppError(AppErrorCode.INVALID_REQUEST, {
|
||||
message: 'Image file is required',
|
||||
userMessage: 'Please upload a valid image file.',
|
||||
});
|
||||
}
|
||||
|
||||
const imageBuffer = await readFile(imagePath);
|
||||
const imageBlob = parsed.data.image;
|
||||
const arrayBuffer = await imageBlob.arrayBuffer();
|
||||
const imageBuffer = Buffer.from(arrayBuffer);
|
||||
const metadata = await sharp(imageBuffer).metadata();
|
||||
const imageWidth = metadata.width;
|
||||
const imageHeight = metadata.height;
|
||||
@ -184,66 +218,10 @@ COORDINATE SYSTEM:
|
||||
}
|
||||
|
||||
console.log('[detect-object-and-draw] Compressing image for Gemini API...');
|
||||
const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
|
||||
const base64Image = compressedImageBuffer.toString('base64');
|
||||
|
||||
console.log('[detect-object-and-draw] Calling Gemini API for form field detection...');
|
||||
const result = await generateObject({
|
||||
model: google('gemini-2.5-pro'),
|
||||
schema: ZDetectObjectsResponseSchema,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'image',
|
||||
image: `data:image/jpeg;base64,${base64Image}`,
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
|
||||
|
||||
IMPORTANT RULES:
|
||||
1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
|
||||
2. Analyze nearby text labels to determine the field type
|
||||
3. Return bounding boxes for the fillable area only, NOT the label text
|
||||
4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
|
||||
|
||||
FIELD TYPES TO DETECT:
|
||||
• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
|
||||
• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
|
||||
• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
|
||||
• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
|
||||
• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
|
||||
• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
|
||||
• RADIO - Empty radio button circles (○) in groups, typically circular selection options
|
||||
• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
|
||||
• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
|
||||
• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
|
||||
|
||||
DETECTION GUIDELINES:
|
||||
- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
|
||||
- If you're uncertain which type fits best, default to TEXT
|
||||
- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
|
||||
- Signature fields are often longer horizontal lines or larger boxes
|
||||
- Date fields often show format hints or date separators (slashes, dashes)
|
||||
- Look for visual patterns: underscores (____), horizontal lines, box outlines
|
||||
- Return coordinates for the fillable area, not the descriptive label text
|
||||
|
||||
COORDINATE SYSTEM:
|
||||
- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
|
||||
- Top-left corner: ymin and xmin close to 0
|
||||
- Bottom-right corner: ymax and xmax close to 1000
|
||||
- Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
const detectedObjects = await runObjectDetection(imageBuffer);
|
||||
console.log('[detect-object-and-draw] Gemini API call completed');
|
||||
|
||||
const detectedObjects = result.object;
|
||||
|
||||
console.log(
|
||||
`[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`,
|
||||
);
|
||||
@ -353,10 +331,7 @@ COORDINATE SYSTEM:
|
||||
await writeFile(outputPath, pngBuffer);
|
||||
|
||||
console.log('[detect-object-and-draw] Image saved successfully!');
|
||||
return c.json<TDetectObjectsAndDrawResponse>({
|
||||
outputPath,
|
||||
detectedObjects,
|
||||
});
|
||||
return c.json<TDetectObjectsResponse>(detectedObjects);
|
||||
} catch (error) {
|
||||
console.error('Object detection and drawing failed:', error);
|
||||
|
||||
@ -369,5 +344,4 @@ COORDINATE SYSTEM:
|
||||
userMessage: 'An error occurred while detecting and drawing objects. Please try again.',
|
||||
});
|
||||
}
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
@ -44,13 +44,7 @@ export type TDetectObjectsRequest = z.infer<typeof ZDetectObjectsRequestSchema>;
|
||||
export type TDetectObjectsResponse = z.infer<typeof ZDetectObjectsResponseSchema>;
|
||||
|
||||
export const ZDetectObjectsAndDrawRequestSchema = z.object({
|
||||
imagePath: z.string().min(1, 'Image path is required'),
|
||||
});
|
||||
|
||||
export const ZDetectObjectsAndDrawResponseSchema = z.object({
|
||||
outputPath: z.string().describe('Path to the generated image with bounding boxes'),
|
||||
detectedObjects: z.array(ZDetectedObjectSchema).describe('Array of detected objects'),
|
||||
image: z.instanceof(Blob, { message: 'Image file is required' }),
|
||||
});
|
||||
|
||||
export type TDetectObjectsAndDrawRequest = z.infer<typeof ZDetectObjectsAndDrawRequestSchema>;
|
||||
export type TDetectObjectsAndDrawResponse = z.infer<typeof ZDetectObjectsAndDrawResponseSchema>;
|
||||
|
||||
110
packages/lib/client-only/utils/page-canvas-registry.ts
Normal file
110
packages/lib/client-only/utils/page-canvas-registry.ts
Normal file
@ -0,0 +1,110 @@
|
||||
import type Konva from 'konva';
|
||||
|
||||
/**
|
||||
* Represents canvas references for a specific PDF page.
|
||||
*/
|
||||
export interface PageCanvasRefs {
|
||||
/** The page number (1-indexed) */
|
||||
pageNumber: number;
|
||||
/** The canvas element containing the rendered PDF */
|
||||
pdfCanvas: HTMLCanvasElement;
|
||||
/** The Konva stage containing field overlays */
|
||||
konvaStage: Konva.Stage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Module-level registry to store canvas references by page number.
|
||||
* This allows any component to access page canvases without prop drilling.
|
||||
*/
|
||||
const pageCanvasRegistry = new Map<number, PageCanvasRefs>();
|
||||
|
||||
/**
|
||||
* Register a page's canvas references.
|
||||
* Call this when a page renderer mounts and has valid canvas refs.
|
||||
*
|
||||
* @param refs - The canvas references to register
|
||||
*/
|
||||
export const registerPageCanvas = (refs: PageCanvasRefs): void => {
|
||||
pageCanvasRegistry.set(refs.pageNumber, refs);
|
||||
};
|
||||
|
||||
/**
|
||||
* Unregister a page's canvas references.
|
||||
* Call this when a page renderer unmounts to prevent memory leaks.
|
||||
*
|
||||
* @param pageNumber - The page number to unregister
|
||||
*/
|
||||
export const unregisterPageCanvas = (pageNumber: number): void => {
|
||||
pageCanvasRegistry.delete(pageNumber);
|
||||
};
|
||||
|
||||
/**
|
||||
* Get canvas references for a specific page.
|
||||
*
|
||||
* @param pageNumber - The page number to retrieve
|
||||
* @returns The canvas references, or undefined if not registered
|
||||
*/
|
||||
export const getPageCanvasRefs = (pageNumber: number): PageCanvasRefs | undefined => {
|
||||
return pageCanvasRegistry.get(pageNumber);
|
||||
};
|
||||
|
||||
/**
|
||||
* Get all registered page numbers.
|
||||
*
|
||||
* @returns Array of page numbers currently registered
|
||||
*/
|
||||
export const getRegisteredPageNumbers = (): number[] => {
|
||||
return Array.from(pageCanvasRegistry.keys()).sort((a, b) => a - b);
|
||||
};
|
||||
|
||||
/**
|
||||
* Composite a PDF page with its field overlays into a single PNG Blob.
|
||||
* This creates a temporary canvas, draws the PDF canvas first (background),
|
||||
* then draws the Konva canvas on top (field overlays).
|
||||
*
|
||||
* @param pageNumber - The page number to composite (1-indexed)
|
||||
* @returns Promise that resolves to a PNG Blob, or null if page not found or compositing fails
|
||||
*/
|
||||
export const compositePageToBlob = async (pageNumber: number): Promise<Blob | null> => {
|
||||
const refs = getPageCanvasRefs(pageNumber);
|
||||
|
||||
if (!refs) {
|
||||
console.warn(`Page ${pageNumber} is not registered for canvas capture`);
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
// Create temporary canvas with same dimensions as PDF canvas
|
||||
const tempCanvas = document.createElement('canvas');
|
||||
tempCanvas.width = refs.pdfCanvas.width;
|
||||
tempCanvas.height = refs.pdfCanvas.height;
|
||||
|
||||
const ctx = tempCanvas.getContext('2d');
|
||||
if (!ctx) {
|
||||
console.error('Failed to get 2D context for temporary canvas');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Draw PDF canvas first (background layer)
|
||||
ctx.drawImage(refs.pdfCanvas, 0, 0);
|
||||
|
||||
// Get Konva canvas and draw on top (field overlays)
|
||||
// Note: Konva's toCanvas() returns a new canvas with all layers rendered
|
||||
const konvaCanvas = refs.konvaStage.toCanvas();
|
||||
ctx.drawImage(konvaCanvas, 0, 0);
|
||||
|
||||
// Convert to PNG Blob
|
||||
return new Promise((resolve, reject) => {
|
||||
tempCanvas.toBlob((blob) => {
|
||||
if (blob) {
|
||||
resolve(blob);
|
||||
} else {
|
||||
reject(new Error('Failed to convert canvas to blob'));
|
||||
}
|
||||
}, 'image/png');
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(`Error compositing page ${pageNumber}:`, error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
19
packages/lib/universal/field-renderer/field-constants.ts
Normal file
19
packages/lib/universal/field-renderer/field-constants.ts
Normal file
@ -0,0 +1,19 @@
|
||||
/**
|
||||
* Shared constants for field dimension enforcement.
|
||||
*
|
||||
* These constants ensure consistency between:
|
||||
* 1. AI prompt (server/api/ai.ts) - instructs Gemini on minimum field dimensions
|
||||
* 2. Client enforcement (envelope-editor-fields-page.tsx) - fallback validation
|
||||
*/
|
||||
|
||||
/**
|
||||
* Minimum field height in pixels.
|
||||
* Fields smaller than this will be expanded to meet minimum usability requirements.
|
||||
*/
|
||||
export const MIN_FIELD_HEIGHT_PX = 30;
|
||||
|
||||
/**
|
||||
* Minimum field width in pixels.
|
||||
* Fields smaller than this will be expanded to meet minimum usability requirements.
|
||||
*/
|
||||
export const MIN_FIELD_WIDTH_PX = 36;
|
||||
Reference in New Issue
Block a user