mirror of
https://github.com/documenso/documenso.git
synced 2025-11-14 00:32:43 +10:00
feat: add AI field auto-placement with canvas registry
This commit is contained in:
@ -11,6 +11,10 @@ import type { TLocalField } from '@documenso/lib/client-only/hooks/use-editor-fi
|
|||||||
import { usePageRenderer } from '@documenso/lib/client-only/hooks/use-page-renderer';
|
import { usePageRenderer } from '@documenso/lib/client-only/hooks/use-page-renderer';
|
||||||
import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
|
import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
|
||||||
import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
|
import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
|
||||||
|
import {
|
||||||
|
registerPageCanvas,
|
||||||
|
unregisterPageCanvas,
|
||||||
|
} from '@documenso/lib/client-only/utils/page-canvas-registry';
|
||||||
import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
|
import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
|
||||||
import {
|
import {
|
||||||
MIN_FIELD_HEIGHT_PX,
|
MIN_FIELD_HEIGHT_PX,
|
||||||
@ -56,6 +60,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
|
|||||||
[editorFields.localFields, pageContext.pageNumber],
|
[editorFields.localFields, pageContext.pageNumber],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleanup: Unregister canvas when component unmounts
|
||||||
|
*/
|
||||||
|
useEffect(() => {
|
||||||
|
return () => {
|
||||||
|
unregisterPageCanvas(pageContext.pageNumber);
|
||||||
|
};
|
||||||
|
}, [pageContext.pageNumber]);
|
||||||
|
|
||||||
const handleResizeOrMove = (event: KonvaEventObject<Event>) => {
|
const handleResizeOrMove = (event: KonvaEventObject<Event>) => {
|
||||||
const { current: container } = canvasElement;
|
const { current: container } = canvasElement;
|
||||||
|
|
||||||
@ -214,6 +227,15 @@ export default function EnvelopeEditorFieldsPageRenderer() {
|
|||||||
currentStage.on('transformend', () => setIsFieldChanging(false));
|
currentStage.on('transformend', () => setIsFieldChanging(false));
|
||||||
|
|
||||||
currentPageLayer.batchDraw();
|
currentPageLayer.batchDraw();
|
||||||
|
|
||||||
|
// Register this page's canvas references now that everything is initialized
|
||||||
|
if (canvasElement.current && currentStage) {
|
||||||
|
registerPageCanvas({
|
||||||
|
pageNumber: pageContext.pageNumber,
|
||||||
|
pdfCanvas: canvasElement.current,
|
||||||
|
konvaStage: currentStage,
|
||||||
|
});
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
import { lazy, useEffect, useMemo } from 'react';
|
import { lazy, useEffect, useMemo, useState } from 'react';
|
||||||
|
|
||||||
import type { MessageDescriptor } from '@lingui/core';
|
import type { MessageDescriptor } from '@lingui/core';
|
||||||
import { msg } from '@lingui/core/macro';
|
import { msg } from '@lingui/core/macro';
|
||||||
@ -11,6 +11,10 @@ import { match } from 'ts-pattern';
|
|||||||
|
|
||||||
import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
|
import { useCurrentEnvelopeEditor } from '@documenso/lib/client-only/providers/envelope-editor-provider';
|
||||||
import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
|
import { useCurrentEnvelopeRender } from '@documenso/lib/client-only/providers/envelope-render-provider';
|
||||||
|
import {
|
||||||
|
compositePageToBlob,
|
||||||
|
getPageCanvasRefs,
|
||||||
|
} from '@documenso/lib/client-only/utils/page-canvas-registry';
|
||||||
import type {
|
import type {
|
||||||
TCheckboxFieldMeta,
|
TCheckboxFieldMeta,
|
||||||
TDateFieldMeta,
|
TDateFieldMeta,
|
||||||
@ -24,12 +28,15 @@ import type {
|
|||||||
TSignatureFieldMeta,
|
TSignatureFieldMeta,
|
||||||
TTextFieldMeta,
|
TTextFieldMeta,
|
||||||
} from '@documenso/lib/types/field-meta';
|
} from '@documenso/lib/types/field-meta';
|
||||||
|
import { FIELD_META_DEFAULT_VALUES } from '@documenso/lib/types/field-meta';
|
||||||
import { canRecipientFieldsBeModified } from '@documenso/lib/utils/recipients';
|
import { canRecipientFieldsBeModified } from '@documenso/lib/utils/recipients';
|
||||||
import { AnimateGenericFadeInOut } from '@documenso/ui/components/animate/animate-generic-fade-in-out';
|
import { AnimateGenericFadeInOut } from '@documenso/ui/components/animate/animate-generic-fade-in-out';
|
||||||
import PDFViewerKonvaLazy from '@documenso/ui/components/pdf-viewer/pdf-viewer-konva-lazy';
|
import PDFViewerKonvaLazy from '@documenso/ui/components/pdf-viewer/pdf-viewer-konva-lazy';
|
||||||
import { Alert, AlertDescription } from '@documenso/ui/primitives/alert';
|
import { Alert, AlertDescription } from '@documenso/ui/primitives/alert';
|
||||||
|
import { Button } from '@documenso/ui/primitives/button';
|
||||||
import { RecipientSelector } from '@documenso/ui/primitives/recipient-selector';
|
import { RecipientSelector } from '@documenso/ui/primitives/recipient-selector';
|
||||||
import { Separator } from '@documenso/ui/primitives/separator';
|
import { Separator } from '@documenso/ui/primitives/separator';
|
||||||
|
import { useToast } from '@documenso/ui/primitives/use-toast';
|
||||||
|
|
||||||
import { EditorFieldCheckboxForm } from '~/components/forms/editor/editor-field-checkbox-form';
|
import { EditorFieldCheckboxForm } from '~/components/forms/editor/editor-field-checkbox-form';
|
||||||
import { EditorFieldDateForm } from '~/components/forms/editor/editor-field-date-form';
|
import { EditorFieldDateForm } from '~/components/forms/editor/editor-field-date-form';
|
||||||
@ -49,6 +56,94 @@ const EnvelopeEditorFieldsPageRenderer = lazy(
|
|||||||
async () => import('./envelope-editor-fields-page-renderer'),
|
async () => import('./envelope-editor-fields-page-renderer'),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enforces minimum field dimensions and centers the field when expanding to meet minimums.
|
||||||
|
*
|
||||||
|
* AI often detects form lines as very thin fields (0.2-0.5% height). This function ensures
|
||||||
|
* fields meet minimum usability requirements by expanding them to at least 30px height and
|
||||||
|
* 36px width, while keeping them centered on their original position.
|
||||||
|
*
|
||||||
|
* @param params - Field dimensions and page size
|
||||||
|
* @param params.positionX - Field X position as percentage (0-100)
|
||||||
|
* @param params.positionY - Field Y position as percentage (0-100)
|
||||||
|
* @param params.width - Field width as percentage (0-100)
|
||||||
|
* @param params.height - Field height as percentage (0-100)
|
||||||
|
* @param params.pageWidth - Page width in pixels
|
||||||
|
* @param params.pageHeight - Page height in pixels
|
||||||
|
* @returns Adjusted field dimensions with minimums enforced and centered
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // AI detected a thin line: 0.3% height
|
||||||
|
* const adjusted = enforceMinimumFieldDimensions({
|
||||||
|
* positionX: 20, positionY: 50, width: 30, height: 0.3,
|
||||||
|
* pageWidth: 800, pageHeight: 1100
|
||||||
|
* });
|
||||||
|
* // Result: height expanded to ~2.7% (30px), centered on original position
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* Enforces minimum field dimensions with centered expansion.
|
||||||
|
*
|
||||||
|
* If a field is smaller than the minimum width or height, it will be expanded
|
||||||
|
* to meet the minimum requirements while staying centered on its original position.
|
||||||
|
*/
|
||||||
|
const enforceMinimumFieldDimensions = (params: {
|
||||||
|
positionX: number;
|
||||||
|
positionY: number;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
pageWidth: number;
|
||||||
|
pageHeight: number;
|
||||||
|
}): {
|
||||||
|
positionX: number;
|
||||||
|
positionY: number;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
} => {
|
||||||
|
const MIN_HEIGHT_PX = 30;
|
||||||
|
const MIN_WIDTH_PX = 36;
|
||||||
|
|
||||||
|
// Convert percentage to pixels to check against minimums
|
||||||
|
const widthPx = (params.width / 100) * params.pageWidth;
|
||||||
|
const heightPx = (params.height / 100) * params.pageHeight;
|
||||||
|
|
||||||
|
let adjustedWidth = params.width;
|
||||||
|
let adjustedHeight = params.height;
|
||||||
|
let adjustedPositionX = params.positionX;
|
||||||
|
let adjustedPositionY = params.positionY;
|
||||||
|
|
||||||
|
if (widthPx < MIN_WIDTH_PX) {
|
||||||
|
const centerXPx = (params.positionX / 100) * params.pageWidth + widthPx / 2;
|
||||||
|
adjustedWidth = (MIN_WIDTH_PX / params.pageWidth) * 100;
|
||||||
|
adjustedPositionX = ((centerXPx - MIN_WIDTH_PX / 2) / params.pageWidth) * 100;
|
||||||
|
|
||||||
|
if (adjustedPositionX < 0) {
|
||||||
|
adjustedPositionX = 0;
|
||||||
|
} else if (adjustedPositionX + adjustedWidth > 100) {
|
||||||
|
adjustedPositionX = 100 - adjustedWidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (heightPx < MIN_HEIGHT_PX) {
|
||||||
|
const centerYPx = (params.positionY / 100) * params.pageHeight + heightPx / 2;
|
||||||
|
adjustedHeight = (MIN_HEIGHT_PX / params.pageHeight) * 100;
|
||||||
|
|
||||||
|
adjustedPositionY = ((centerYPx - MIN_HEIGHT_PX / 2) / params.pageHeight) * 100;
|
||||||
|
|
||||||
|
if (adjustedPositionY < 0) {
|
||||||
|
adjustedPositionY = 0;
|
||||||
|
} else if (adjustedPositionY + adjustedHeight > 100) {
|
||||||
|
adjustedPositionY = 100 - adjustedHeight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
positionX: adjustedPositionX,
|
||||||
|
positionY: adjustedPositionY,
|
||||||
|
width: adjustedWidth,
|
||||||
|
height: adjustedHeight,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
const FieldSettingsTypeTranslations: Record<FieldType, MessageDescriptor> = {
|
const FieldSettingsTypeTranslations: Record<FieldType, MessageDescriptor> = {
|
||||||
[FieldType.SIGNATURE]: msg`Signature Settings`,
|
[FieldType.SIGNATURE]: msg`Signature Settings`,
|
||||||
[FieldType.FREE_SIGNATURE]: msg`Free Signature Settings`,
|
[FieldType.FREE_SIGNATURE]: msg`Free Signature Settings`,
|
||||||
@ -69,6 +164,9 @@ export const EnvelopeEditorFieldsPage = () => {
|
|||||||
const { currentEnvelopeItem } = useCurrentEnvelopeRender();
|
const { currentEnvelopeItem } = useCurrentEnvelopeRender();
|
||||||
|
|
||||||
const { t } = useLingui();
|
const { t } = useLingui();
|
||||||
|
const { toast } = useToast();
|
||||||
|
|
||||||
|
const [isAutoAddingFields, setIsAutoAddingFields] = useState(false);
|
||||||
|
|
||||||
const selectedField = useMemo(
|
const selectedField = useMemo(
|
||||||
() => structuredClone(editorFields.selectedField),
|
() => structuredClone(editorFields.selectedField),
|
||||||
@ -187,6 +285,134 @@ export const EnvelopeEditorFieldsPage = () => {
|
|||||||
selectedRecipientId={editorFields.selectedRecipient?.id ?? null}
|
selectedRecipientId={editorFields.selectedRecipient?.id ?? null}
|
||||||
selectedEnvelopeItemId={currentEnvelopeItem?.id ?? null}
|
selectedEnvelopeItemId={currentEnvelopeItem?.id ?? null}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
<Button
|
||||||
|
className="mt-4 w-full"
|
||||||
|
variant="outline"
|
||||||
|
disabled={isAutoAddingFields}
|
||||||
|
onClick={async () => {
|
||||||
|
setIsAutoAddingFields(true);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const blob = await compositePageToBlob(1);
|
||||||
|
|
||||||
|
if (!blob) {
|
||||||
|
toast({
|
||||||
|
title: t`Error`,
|
||||||
|
description: t`Failed to capture page. Please ensure the document is fully loaded.`,
|
||||||
|
variant: 'destructive',
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Successfully captured page 1 as PNG Blob:', {
|
||||||
|
size: `${(blob.size / 1024).toFixed(2)} KB`,
|
||||||
|
type: blob.type,
|
||||||
|
});
|
||||||
|
console.log('Blob object:', blob);
|
||||||
|
|
||||||
|
console.log('[Auto Add Fields] Sending image to AI endpoint...');
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('image', blob, 'page-1.png');
|
||||||
|
|
||||||
|
const response = await fetch('/api/ai/detect-object-and-draw', {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData,
|
||||||
|
credentials: 'include',
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`AI detection failed: ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const detectedFields = await response.json();
|
||||||
|
console.log(
|
||||||
|
`[Auto Add Fields] Detected ${detectedFields.length} fields:`,
|
||||||
|
detectedFields,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!editorFields.selectedRecipient || !currentEnvelopeItem) {
|
||||||
|
toast({
|
||||||
|
title: t`Warning`,
|
||||||
|
description: t`Please select a recipient before adding fields.`,
|
||||||
|
variant: 'destructive',
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pageCanvasRefs = getPageCanvasRefs(1);
|
||||||
|
if (!pageCanvasRefs) {
|
||||||
|
console.warn(
|
||||||
|
'[Auto Add Fields] Could not get page dimensions for minimum field enforcement',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let addedCount = 0;
|
||||||
|
for (const detected of detectedFields) {
|
||||||
|
const [ymin, xmin, ymax, xmax] = detected.box_2d;
|
||||||
|
let positionX = (xmin / 1000) * 100;
|
||||||
|
let positionY = (ymin / 1000) * 100;
|
||||||
|
let width = ((xmax - xmin) / 1000) * 100;
|
||||||
|
let height = ((ymax - ymin) / 1000) * 100;
|
||||||
|
|
||||||
|
if (pageCanvasRefs) {
|
||||||
|
const adjusted = enforceMinimumFieldDimensions({
|
||||||
|
positionX,
|
||||||
|
positionY,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
pageWidth: pageCanvasRefs.pdfCanvas.width,
|
||||||
|
pageHeight: pageCanvasRefs.pdfCanvas.height,
|
||||||
|
});
|
||||||
|
|
||||||
|
positionX = adjusted.positionX;
|
||||||
|
positionY = adjusted.positionY;
|
||||||
|
width = adjusted.width;
|
||||||
|
height = adjusted.height;
|
||||||
|
}
|
||||||
|
|
||||||
|
const fieldType = detected.label as FieldType;
|
||||||
|
|
||||||
|
try {
|
||||||
|
editorFields.addField({
|
||||||
|
envelopeItemId: currentEnvelopeItem.id,
|
||||||
|
page: 1,
|
||||||
|
type: fieldType,
|
||||||
|
positionX,
|
||||||
|
positionY,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
recipientId: editorFields.selectedRecipient.id,
|
||||||
|
fieldMeta: structuredClone(FIELD_META_DEFAULT_VALUES[fieldType]),
|
||||||
|
});
|
||||||
|
addedCount++;
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to add ${fieldType} field:`, error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`[Auto Add Fields] Successfully added ${addedCount} fields to the document`,
|
||||||
|
);
|
||||||
|
|
||||||
|
toast({
|
||||||
|
title: t`Success`,
|
||||||
|
description: t`Added ${addedCount} fields to the document`,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Auto add fields error:', error);
|
||||||
|
toast({
|
||||||
|
title: t`Error`,
|
||||||
|
description: t`An unexpected error occurred while capturing the page.`,
|
||||||
|
variant: 'destructive',
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
setIsAutoAddingFields(false);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{isAutoAddingFields ? <Trans>Processing...</Trans> : <Trans>Auto add fields</Trans>}
|
||||||
|
</Button>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
{/* Field details section. */}
|
{/* Field details section. */}
|
||||||
|
|||||||
@ -1,12 +1,11 @@
|
|||||||
import { useMemo, useState } from 'react';
|
import { useMemo, useState } from 'react';
|
||||||
|
|
||||||
import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
|
|
||||||
import type { DropResult } from '@hello-pangea/dnd';
|
import type { DropResult } from '@hello-pangea/dnd';
|
||||||
|
import { DragDropContext, Draggable, Droppable } from '@hello-pangea/dnd';
|
||||||
import { msg } from '@lingui/core/macro';
|
import { msg } from '@lingui/core/macro';
|
||||||
import { Trans, useLingui } from '@lingui/react/macro';
|
import { Trans, useLingui } from '@lingui/react/macro';
|
||||||
import { DocumentStatus } from '@prisma/client';
|
import { DocumentStatus } from '@prisma/client';
|
||||||
import { FileWarningIcon, GripVerticalIcon, Loader2 } from 'lucide-react';
|
import { FileWarningIcon, GripVerticalIcon, Loader2, X } from 'lucide-react';
|
||||||
import { X } from 'lucide-react';
|
|
||||||
import { ErrorCode as DropzoneErrorCode, type FileRejection } from 'react-dropzone';
|
import { ErrorCode as DropzoneErrorCode, type FileRejection } from 'react-dropzone';
|
||||||
import { Link } from 'react-router';
|
import { Link } from 'react-router';
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,6 @@ import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error';
|
|||||||
|
|
||||||
import type { HonoEnv } from '../router';
|
import type { HonoEnv } from '../router';
|
||||||
import {
|
import {
|
||||||
type TDetectObjectsAndDrawResponse,
|
|
||||||
type TDetectObjectsResponse,
|
type TDetectObjectsResponse,
|
||||||
type TGenerateTextResponse,
|
type TGenerateTextResponse,
|
||||||
ZDetectObjectsAndDrawRequestSchema,
|
ZDetectObjectsAndDrawRequestSchema,
|
||||||
@ -41,6 +40,88 @@ async function resizeAndCompressImage(imageBuffer: Buffer): Promise<Buffer> {
|
|||||||
return await sharp(imageBuffer).jpeg({ quality: 70 }).toBuffer();
|
return await sharp(imageBuffer).jpeg({ quality: 70 }).toBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const detectObjectsPrompt = `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
|
||||||
|
|
||||||
|
IMPORTANT RULES:
|
||||||
|
1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
|
||||||
|
2. Analyze nearby text labels to determine the field type
|
||||||
|
3. Return bounding boxes for the fillable area only, NOT the label text
|
||||||
|
4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
|
||||||
|
|
||||||
|
FIELD TYPES TO DETECT:
|
||||||
|
• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
|
||||||
|
• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
|
||||||
|
• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
|
||||||
|
• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
|
||||||
|
• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
|
||||||
|
• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
|
||||||
|
• RADIO - Empty radio button circles (○) in groups, typically circular selection options
|
||||||
|
• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
|
||||||
|
• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
|
||||||
|
• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
|
||||||
|
|
||||||
|
DETECTION GUIDELINES:
|
||||||
|
- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
|
||||||
|
- If you're uncertain which type fits best, default to TEXT
|
||||||
|
- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
|
||||||
|
- Signature fields are often longer horizontal lines or larger boxes
|
||||||
|
- Date fields often show format hints or date separators (slashes, dashes)
|
||||||
|
- Look for visual patterns: underscores (____), horizontal lines, box outlines
|
||||||
|
- Return coordinates for the fillable area, not the descriptive label text
|
||||||
|
|
||||||
|
COORDINATE SYSTEM:
|
||||||
|
- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
|
||||||
|
- Top-left corner: ymin and xmin close to 0
|
||||||
|
- Bottom-right corner: ymax and xmax close to 1000
|
||||||
|
- Coordinates represent positions on a 1000x1000 grid overlaid on the image
|
||||||
|
|
||||||
|
FIELD SIZING STRATEGY FOR LINE-BASED FIELDS:
|
||||||
|
When detecting thin horizontal lines for SIGNATURE, INITIALS, NAME, EMAIL, DATE, TEXT, or NUMBER fields:
|
||||||
|
1. Analyze the visual context around the detected line:
|
||||||
|
- Look at the empty space ABOVE the detected line
|
||||||
|
- Observe the spacing to any text labels, headers, or other form elements above
|
||||||
|
- Assess what would be a reasonable field height to make the field clearly visible when filled
|
||||||
|
2. Expand UPWARD from the detected line to create a usable field:
|
||||||
|
- Keep ymax (bottom) at the detected line position (the line becomes the bottom edge)
|
||||||
|
- Extend ymin (top) upward into the available whitespace
|
||||||
|
- Aim to use 60-80% of the clear whitespace above the line, while being reasonable
|
||||||
|
- The expanded field should provide comfortable space for signing/writing (minimum 30 units tall)
|
||||||
|
3. Apply minimum dimensions: height at least 30 units (3% of 1000-scale), width at least 36 units
|
||||||
|
4. Ensure ymin >= 0 (do not go off-page). If ymin would be negative, clamp to 0
|
||||||
|
5. Do NOT apply this expansion to CHECKBOX, RADIO, or DROPDOWN fields - use detected dimensions for those
|
||||||
|
6. Example: If you detect a signature line at ymax=500 with clear whitespace extending up to y=400:
|
||||||
|
- Available whitespace: 100 units
|
||||||
|
- Use 60-80% of that: 60-80 units
|
||||||
|
- Expanded field: [ymin=420, xmin=200, ymax=500, xmax=600] (creates 80-unit tall field)
|
||||||
|
- This gives comfortable signing space while respecting the form layout`;
|
||||||
|
|
||||||
|
const runObjectDetection = async (imageBuffer: Buffer): Promise<TDetectObjectsResponse> => {
|
||||||
|
const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
|
||||||
|
const base64Image = compressedImageBuffer.toString('base64');
|
||||||
|
|
||||||
|
const result = await generateObject({
|
||||||
|
model: google('gemini-2.5-pro'),
|
||||||
|
schema: ZDetectObjectsResponseSchema,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'image',
|
||||||
|
image: `data:image/jpeg;base64,${base64Image}`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: detectObjectsPrompt,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
return result.object;
|
||||||
|
};
|
||||||
|
|
||||||
export const aiRoute = new Hono<HonoEnv>()
|
export const aiRoute = new Hono<HonoEnv>()
|
||||||
.use(
|
.use(
|
||||||
'*',
|
'*',
|
||||||
@ -85,63 +166,9 @@ export const aiRoute = new Hono<HonoEnv>()
|
|||||||
const { imagePath } = c.req.valid('json');
|
const { imagePath } = c.req.valid('json');
|
||||||
|
|
||||||
const imageBuffer = await readFile(imagePath);
|
const imageBuffer = await readFile(imagePath);
|
||||||
const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
|
const detectedObjects = await runObjectDetection(imageBuffer);
|
||||||
const base64Image = compressedImageBuffer.toString('base64');
|
|
||||||
|
|
||||||
const result = await generateObject({
|
return c.json<TDetectObjectsResponse>(detectedObjects);
|
||||||
model: google('gemini-2.5-pro'),
|
|
||||||
schema: ZDetectObjectsResponseSchema,
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: 'user',
|
|
||||||
content: [
|
|
||||||
{
|
|
||||||
type: 'image',
|
|
||||||
image: `data:image/jpeg;base64,${base64Image}`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
type: 'text',
|
|
||||||
text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
|
|
||||||
|
|
||||||
IMPORTANT RULES:
|
|
||||||
1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
|
|
||||||
2. Analyze nearby text labels to determine the field type
|
|
||||||
3. Return bounding boxes for the fillable area only, NOT the label text
|
|
||||||
4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
|
|
||||||
|
|
||||||
FIELD TYPES TO DETECT:
|
|
||||||
• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
|
|
||||||
• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
|
|
||||||
• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
|
|
||||||
• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
|
|
||||||
• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
|
|
||||||
• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
|
|
||||||
• RADIO - Empty radio button circles (○) in groups, typically circular selection options
|
|
||||||
• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
|
|
||||||
• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
|
|
||||||
• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
|
|
||||||
|
|
||||||
DETECTION GUIDELINES:
|
|
||||||
- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
|
|
||||||
- If you're uncertain which type fits best, default to TEXT
|
|
||||||
- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
|
|
||||||
- Signature fields are often longer horizontal lines or larger boxes
|
|
||||||
- Date fields often show format hints or date separators (slashes, dashes)
|
|
||||||
- Look for visual patterns: underscores (____), horizontal lines, box outlines
|
|
||||||
- Return coordinates for the fillable area, not the descriptive label text
|
|
||||||
|
|
||||||
COORDINATE SYSTEM:
|
|
||||||
- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
|
|
||||||
- Top-left corner: ymin and xmin close to 0
|
|
||||||
- Bottom-right corner: ymax and xmax close to 1000
|
|
||||||
- Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
});
|
|
||||||
|
|
||||||
return c.json<TDetectObjectsResponse>(result.object);
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Object detection failed:', error);
|
console.error('Object detection failed:', error);
|
||||||
|
|
||||||
@ -156,218 +183,165 @@ COORDINATE SYSTEM:
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
.post(
|
.post('/detect-object-and-draw', async (c) => {
|
||||||
'/detect-object-and-draw',
|
try {
|
||||||
sValidator('json', ZDetectObjectsAndDrawRequestSchema),
|
await getSession(c.req.raw);
|
||||||
async (c) => {
|
|
||||||
try {
|
|
||||||
await getSession(c.req.raw);
|
|
||||||
|
|
||||||
const { imagePath } = c.req.valid('json');
|
const parsedBody = await c.req.parseBody();
|
||||||
|
const rawImage = parsedBody.image;
|
||||||
|
const imageCandidate = Array.isArray(rawImage) ? rawImage[0] : rawImage;
|
||||||
|
const parsed = ZDetectObjectsAndDrawRequestSchema.safeParse({ image: imageCandidate });
|
||||||
|
|
||||||
console.log(`[detect-object-and-draw] Reading image from: ${imagePath}`);
|
if (!parsed.success) {
|
||||||
|
throw new AppError(AppErrorCode.INVALID_REQUEST, {
|
||||||
const imageBuffer = await readFile(imagePath);
|
message: 'Image file is required',
|
||||||
const metadata = await sharp(imageBuffer).metadata();
|
userMessage: 'Please upload a valid image file.',
|
||||||
const imageWidth = metadata.width;
|
|
||||||
const imageHeight = metadata.height;
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
`[detect-object-and-draw] Original image dimensions: ${imageWidth}x${imageHeight}`,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!imageWidth || !imageHeight) {
|
|
||||||
throw new AppError(AppErrorCode.INVALID_REQUEST, {
|
|
||||||
message: 'Unable to extract image dimensions',
|
|
||||||
userMessage: 'The image file appears to be invalid or corrupted.',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('[detect-object-and-draw] Compressing image for Gemini API...');
|
|
||||||
const compressedImageBuffer = await resizeAndCompressImage(imageBuffer);
|
|
||||||
const base64Image = compressedImageBuffer.toString('base64');
|
|
||||||
|
|
||||||
console.log('[detect-object-and-draw] Calling Gemini API for form field detection...');
|
|
||||||
const result = await generateObject({
|
|
||||||
model: google('gemini-2.5-pro'),
|
|
||||||
schema: ZDetectObjectsResponseSchema,
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: 'user',
|
|
||||||
content: [
|
|
||||||
{
|
|
||||||
type: 'image',
|
|
||||||
image: `data:image/jpeg;base64,${base64Image}`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
type: 'text',
|
|
||||||
text: `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform.
|
|
||||||
|
|
||||||
IMPORTANT RULES:
|
|
||||||
1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data)
|
|
||||||
2. Analyze nearby text labels to determine the field type
|
|
||||||
3. Return bounding boxes for the fillable area only, NOT the label text
|
|
||||||
4. Each bounding box must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale
|
|
||||||
|
|
||||||
FIELD TYPES TO DETECT:
|
|
||||||
• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____'
|
|
||||||
• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields
|
|
||||||
• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name'
|
|
||||||
• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:'
|
|
||||||
• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____'
|
|
||||||
• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes
|
|
||||||
• RADIO - Empty radio button circles (○) in groups, typically circular selection options
|
|
||||||
• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#'
|
|
||||||
• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select'
|
|
||||||
• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain
|
|
||||||
|
|
||||||
DETECTION GUIDELINES:
|
|
||||||
- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type
|
|
||||||
- If you're uncertain which type fits best, default to TEXT
|
|
||||||
- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label
|
|
||||||
- Signature fields are often longer horizontal lines or larger boxes
|
|
||||||
- Date fields often show format hints or date separators (slashes, dashes)
|
|
||||||
- Look for visual patterns: underscores (____), horizontal lines, box outlines
|
|
||||||
- Return coordinates for the fillable area, not the descriptive label text
|
|
||||||
|
|
||||||
COORDINATE SYSTEM:
|
|
||||||
- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale
|
|
||||||
- Top-left corner: ymin and xmin close to 0
|
|
||||||
- Bottom-right corner: ymax and xmax close to 1000
|
|
||||||
- Coordinates represent positions on a 1000x1000 grid overlaid on the image`,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
});
|
|
||||||
console.log('[detect-object-and-draw] Gemini API call completed');
|
|
||||||
|
|
||||||
const detectedObjects = result.object;
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
`[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`,
|
|
||||||
);
|
|
||||||
|
|
||||||
const padding = { left: 80, top: 20, right: 20, bottom: 40 };
|
|
||||||
const canvas = new Canvas(
|
|
||||||
imageWidth + padding.left + padding.right,
|
|
||||||
imageHeight + padding.top + padding.bottom,
|
|
||||||
);
|
|
||||||
const ctx = canvas.getContext('2d');
|
|
||||||
|
|
||||||
const img = new Image();
|
|
||||||
img.src = imageBuffer;
|
|
||||||
ctx.drawImage(img, padding.left, padding.top);
|
|
||||||
|
|
||||||
ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)';
|
|
||||||
ctx.lineWidth = 1;
|
|
||||||
|
|
||||||
for (let i = 0; i <= 1000; i += 100) {
|
|
||||||
const x = padding.left + (i / 1000) * imageWidth;
|
|
||||||
ctx.beginPath();
|
|
||||||
ctx.moveTo(x, padding.top);
|
|
||||||
ctx.lineTo(x, imageHeight + padding.top);
|
|
||||||
ctx.stroke();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Horizontal grid lines (every 100 units on 0-1000 scale)
|
|
||||||
for (let i = 0; i <= 1000; i += 100) {
|
|
||||||
const y = padding.top + (i / 1000) * imageHeight;
|
|
||||||
ctx.beginPath();
|
|
||||||
ctx.moveTo(padding.left, y);
|
|
||||||
ctx.lineTo(imageWidth + padding.left, y);
|
|
||||||
ctx.stroke();
|
|
||||||
}
|
|
||||||
|
|
||||||
const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
|
|
||||||
|
|
||||||
detectedObjects.forEach((obj, index) => {
|
|
||||||
const [ymin, xmin, ymax, xmax] = obj.box_2d.map((coord) => coord / 1000);
|
|
||||||
|
|
||||||
const x = xmin * imageWidth + padding.left;
|
|
||||||
const y = ymin * imageHeight + padding.top;
|
|
||||||
const width = (xmax - xmin) * imageWidth;
|
|
||||||
const height = (ymax - ymin) * imageHeight;
|
|
||||||
|
|
||||||
ctx.strokeStyle = colors[index % colors.length];
|
|
||||||
ctx.lineWidth = 5;
|
|
||||||
ctx.strokeRect(x, y, width, height);
|
|
||||||
|
|
||||||
ctx.fillStyle = colors[index % colors.length];
|
|
||||||
ctx.font = '20px Arial';
|
|
||||||
ctx.fillText(obj.label, x, y - 5);
|
|
||||||
});
|
|
||||||
|
|
||||||
ctx.strokeStyle = '#000000';
|
|
||||||
ctx.lineWidth = 1;
|
|
||||||
ctx.font = '26px Arial';
|
|
||||||
|
|
||||||
ctx.beginPath();
|
|
||||||
ctx.moveTo(padding.left, padding.top);
|
|
||||||
ctx.lineTo(padding.left, imageHeight + padding.top);
|
|
||||||
ctx.stroke();
|
|
||||||
|
|
||||||
ctx.textAlign = 'right';
|
|
||||||
ctx.textBaseline = 'middle';
|
|
||||||
for (let i = 0; i <= 1000; i += 100) {
|
|
||||||
const y = padding.top + (i / 1000) * imageHeight;
|
|
||||||
ctx.fillStyle = '#000000';
|
|
||||||
ctx.fillText(i.toString(), padding.left - 5, y);
|
|
||||||
|
|
||||||
ctx.beginPath();
|
|
||||||
ctx.moveTo(padding.left - 5, y);
|
|
||||||
ctx.lineTo(padding.left, y);
|
|
||||||
ctx.stroke();
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.beginPath();
|
|
||||||
ctx.moveTo(padding.left, imageHeight + padding.top);
|
|
||||||
ctx.lineTo(imageWidth + padding.left, imageHeight + padding.top);
|
|
||||||
ctx.stroke();
|
|
||||||
|
|
||||||
ctx.textAlign = 'center';
|
|
||||||
ctx.textBaseline = 'top';
|
|
||||||
for (let i = 0; i <= 1000; i += 100) {
|
|
||||||
const x = padding.left + (i / 1000) * imageWidth;
|
|
||||||
ctx.fillStyle = '#000000';
|
|
||||||
ctx.fillText(i.toString(), x, imageHeight + padding.top + 5);
|
|
||||||
|
|
||||||
ctx.beginPath();
|
|
||||||
ctx.moveTo(x, imageHeight + padding.top);
|
|
||||||
ctx.lineTo(x, imageHeight + padding.top + 5);
|
|
||||||
ctx.stroke();
|
|
||||||
}
|
|
||||||
|
|
||||||
const now = new Date();
|
|
||||||
const timestamp = now
|
|
||||||
.toISOString()
|
|
||||||
.replace(/[-:]/g, '')
|
|
||||||
.replace(/\..+/, '')
|
|
||||||
.replace('T', '_');
|
|
||||||
const outputFilename = `detected_objects_${timestamp}.png`;
|
|
||||||
const outputPath = join(process.cwd(), outputFilename);
|
|
||||||
|
|
||||||
console.log('[detect-object-and-draw] Converting canvas to PNG buffer...');
|
|
||||||
const pngBuffer = await canvas.toBuffer('png');
|
|
||||||
console.log(`[detect-object-and-draw] Saving to: ${outputPath}`);
|
|
||||||
await writeFile(outputPath, pngBuffer);
|
|
||||||
|
|
||||||
console.log('[detect-object-and-draw] Image saved successfully!');
|
|
||||||
return c.json<TDetectObjectsAndDrawResponse>({
|
|
||||||
outputPath,
|
|
||||||
detectedObjects,
|
|
||||||
});
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Object detection and drawing failed:', error);
|
|
||||||
|
|
||||||
if (error instanceof AppError) {
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new AppError(AppErrorCode.UNKNOWN_ERROR, {
|
|
||||||
message: 'Failed to detect objects and draw',
|
|
||||||
userMessage: 'An error occurred while detecting and drawing objects. Please try again.',
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
},
|
|
||||||
);
|
const imageBlob = parsed.data.image;
|
||||||
|
const arrayBuffer = await imageBlob.arrayBuffer();
|
||||||
|
const imageBuffer = Buffer.from(arrayBuffer);
|
||||||
|
const metadata = await sharp(imageBuffer).metadata();
|
||||||
|
const imageWidth = metadata.width;
|
||||||
|
const imageHeight = metadata.height;
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`[detect-object-and-draw] Original image dimensions: ${imageWidth}x${imageHeight}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!imageWidth || !imageHeight) {
|
||||||
|
throw new AppError(AppErrorCode.INVALID_REQUEST, {
|
||||||
|
message: 'Unable to extract image dimensions',
|
||||||
|
userMessage: 'The image file appears to be invalid or corrupted.',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[detect-object-and-draw] Compressing image for Gemini API...');
|
||||||
|
console.log('[detect-object-and-draw] Calling Gemini API for form field detection...');
|
||||||
|
const detectedObjects = await runObjectDetection(imageBuffer);
|
||||||
|
console.log('[detect-object-and-draw] Gemini API call completed');
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`[detect-object-and-draw] Detected ${detectedObjects.length} objects, starting to draw...`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const padding = { left: 80, top: 20, right: 20, bottom: 40 };
|
||||||
|
const canvas = new Canvas(
|
||||||
|
imageWidth + padding.left + padding.right,
|
||||||
|
imageHeight + padding.top + padding.bottom,
|
||||||
|
);
|
||||||
|
const ctx = canvas.getContext('2d');
|
||||||
|
|
||||||
|
const img = new Image();
|
||||||
|
img.src = imageBuffer;
|
||||||
|
ctx.drawImage(img, padding.left, padding.top);
|
||||||
|
|
||||||
|
ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)';
|
||||||
|
ctx.lineWidth = 1;
|
||||||
|
|
||||||
|
for (let i = 0; i <= 1000; i += 100) {
|
||||||
|
const x = padding.left + (i / 1000) * imageWidth;
|
||||||
|
ctx.beginPath();
|
||||||
|
ctx.moveTo(x, padding.top);
|
||||||
|
ctx.lineTo(x, imageHeight + padding.top);
|
||||||
|
ctx.stroke();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Horizontal grid lines (every 100 units on 0-1000 scale)
|
||||||
|
for (let i = 0; i <= 1000; i += 100) {
|
||||||
|
const y = padding.top + (i / 1000) * imageHeight;
|
||||||
|
ctx.beginPath();
|
||||||
|
ctx.moveTo(padding.left, y);
|
||||||
|
ctx.lineTo(imageWidth + padding.left, y);
|
||||||
|
ctx.stroke();
|
||||||
|
}
|
||||||
|
|
||||||
|
const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
|
||||||
|
|
||||||
|
detectedObjects.forEach((obj, index) => {
|
||||||
|
const [ymin, xmin, ymax, xmax] = obj.box_2d.map((coord) => coord / 1000);
|
||||||
|
|
||||||
|
const x = xmin * imageWidth + padding.left;
|
||||||
|
const y = ymin * imageHeight + padding.top;
|
||||||
|
const width = (xmax - xmin) * imageWidth;
|
||||||
|
const height = (ymax - ymin) * imageHeight;
|
||||||
|
|
||||||
|
ctx.strokeStyle = colors[index % colors.length];
|
||||||
|
ctx.lineWidth = 5;
|
||||||
|
ctx.strokeRect(x, y, width, height);
|
||||||
|
|
||||||
|
ctx.fillStyle = colors[index % colors.length];
|
||||||
|
ctx.font = '20px Arial';
|
||||||
|
ctx.fillText(obj.label, x, y - 5);
|
||||||
|
});
|
||||||
|
|
||||||
|
ctx.strokeStyle = '#000000';
|
||||||
|
ctx.lineWidth = 1;
|
||||||
|
ctx.font = '26px Arial';
|
||||||
|
|
||||||
|
ctx.beginPath();
|
||||||
|
ctx.moveTo(padding.left, padding.top);
|
||||||
|
ctx.lineTo(padding.left, imageHeight + padding.top);
|
||||||
|
ctx.stroke();
|
||||||
|
|
||||||
|
ctx.textAlign = 'right';
|
||||||
|
ctx.textBaseline = 'middle';
|
||||||
|
for (let i = 0; i <= 1000; i += 100) {
|
||||||
|
const y = padding.top + (i / 1000) * imageHeight;
|
||||||
|
ctx.fillStyle = '#000000';
|
||||||
|
ctx.fillText(i.toString(), padding.left - 5, y);
|
||||||
|
|
||||||
|
ctx.beginPath();
|
||||||
|
ctx.moveTo(padding.left - 5, y);
|
||||||
|
ctx.lineTo(padding.left, y);
|
||||||
|
ctx.stroke();
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.beginPath();
|
||||||
|
ctx.moveTo(padding.left, imageHeight + padding.top);
|
||||||
|
ctx.lineTo(imageWidth + padding.left, imageHeight + padding.top);
|
||||||
|
ctx.stroke();
|
||||||
|
|
||||||
|
ctx.textAlign = 'center';
|
||||||
|
ctx.textBaseline = 'top';
|
||||||
|
for (let i = 0; i <= 1000; i += 100) {
|
||||||
|
const x = padding.left + (i / 1000) * imageWidth;
|
||||||
|
ctx.fillStyle = '#000000';
|
||||||
|
ctx.fillText(i.toString(), x, imageHeight + padding.top + 5);
|
||||||
|
|
||||||
|
ctx.beginPath();
|
||||||
|
ctx.moveTo(x, imageHeight + padding.top);
|
||||||
|
ctx.lineTo(x, imageHeight + padding.top + 5);
|
||||||
|
ctx.stroke();
|
||||||
|
}
|
||||||
|
|
||||||
|
const now = new Date();
|
||||||
|
const timestamp = now
|
||||||
|
.toISOString()
|
||||||
|
.replace(/[-:]/g, '')
|
||||||
|
.replace(/\..+/, '')
|
||||||
|
.replace('T', '_');
|
||||||
|
const outputFilename = `detected_objects_${timestamp}.png`;
|
||||||
|
const outputPath = join(process.cwd(), outputFilename);
|
||||||
|
|
||||||
|
console.log('[detect-object-and-draw] Converting canvas to PNG buffer...');
|
||||||
|
const pngBuffer = await canvas.toBuffer('png');
|
||||||
|
console.log(`[detect-object-and-draw] Saving to: ${outputPath}`);
|
||||||
|
await writeFile(outputPath, pngBuffer);
|
||||||
|
|
||||||
|
console.log('[detect-object-and-draw] Image saved successfully!');
|
||||||
|
return c.json<TDetectObjectsResponse>(detectedObjects);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Object detection and drawing failed:', error);
|
||||||
|
|
||||||
|
if (error instanceof AppError) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new AppError(AppErrorCode.UNKNOWN_ERROR, {
|
||||||
|
message: 'Failed to detect objects and draw',
|
||||||
|
userMessage: 'An error occurred while detecting and drawing objects. Please try again.',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|||||||
@ -44,13 +44,7 @@ export type TDetectObjectsRequest = z.infer<typeof ZDetectObjectsRequestSchema>;
|
|||||||
export type TDetectObjectsResponse = z.infer<typeof ZDetectObjectsResponseSchema>;
|
export type TDetectObjectsResponse = z.infer<typeof ZDetectObjectsResponseSchema>;
|
||||||
|
|
||||||
export const ZDetectObjectsAndDrawRequestSchema = z.object({
|
export const ZDetectObjectsAndDrawRequestSchema = z.object({
|
||||||
imagePath: z.string().min(1, 'Image path is required'),
|
image: z.instanceof(Blob, { message: 'Image file is required' }),
|
||||||
});
|
|
||||||
|
|
||||||
export const ZDetectObjectsAndDrawResponseSchema = z.object({
|
|
||||||
outputPath: z.string().describe('Path to the generated image with bounding boxes'),
|
|
||||||
detectedObjects: z.array(ZDetectedObjectSchema).describe('Array of detected objects'),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
export type TDetectObjectsAndDrawRequest = z.infer<typeof ZDetectObjectsAndDrawRequestSchema>;
|
export type TDetectObjectsAndDrawRequest = z.infer<typeof ZDetectObjectsAndDrawRequestSchema>;
|
||||||
export type TDetectObjectsAndDrawResponse = z.infer<typeof ZDetectObjectsAndDrawResponseSchema>;
|
|
||||||
|
|||||||
110
packages/lib/client-only/utils/page-canvas-registry.ts
Normal file
110
packages/lib/client-only/utils/page-canvas-registry.ts
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
import type Konva from 'konva';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents canvas references for a specific PDF page.
|
||||||
|
*/
|
||||||
|
export interface PageCanvasRefs {
|
||||||
|
/** The page number (1-indexed) */
|
||||||
|
pageNumber: number;
|
||||||
|
/** The canvas element containing the rendered PDF */
|
||||||
|
pdfCanvas: HTMLCanvasElement;
|
||||||
|
/** The Konva stage containing field overlays */
|
||||||
|
konvaStage: Konva.Stage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Module-level registry to store canvas references by page number.
|
||||||
|
* This allows any component to access page canvases without prop drilling.
|
||||||
|
*/
|
||||||
|
const pageCanvasRegistry = new Map<number, PageCanvasRefs>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register a page's canvas references.
|
||||||
|
* Call this when a page renderer mounts and has valid canvas refs.
|
||||||
|
*
|
||||||
|
* @param refs - The canvas references to register
|
||||||
|
*/
|
||||||
|
export const registerPageCanvas = (refs: PageCanvasRefs): void => {
|
||||||
|
pageCanvasRegistry.set(refs.pageNumber, refs);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unregister a page's canvas references.
|
||||||
|
* Call this when a page renderer unmounts to prevent memory leaks.
|
||||||
|
*
|
||||||
|
* @param pageNumber - The page number to unregister
|
||||||
|
*/
|
||||||
|
export const unregisterPageCanvas = (pageNumber: number): void => {
|
||||||
|
pageCanvasRegistry.delete(pageNumber);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get canvas references for a specific page.
|
||||||
|
*
|
||||||
|
* @param pageNumber - The page number to retrieve
|
||||||
|
* @returns The canvas references, or undefined if not registered
|
||||||
|
*/
|
||||||
|
export const getPageCanvasRefs = (pageNumber: number): PageCanvasRefs | undefined => {
|
||||||
|
return pageCanvasRegistry.get(pageNumber);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all registered page numbers.
|
||||||
|
*
|
||||||
|
* @returns Array of page numbers currently registered
|
||||||
|
*/
|
||||||
|
export const getRegisteredPageNumbers = (): number[] => {
|
||||||
|
return Array.from(pageCanvasRegistry.keys()).sort((a, b) => a - b);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Composite a PDF page with its field overlays into a single PNG Blob.
|
||||||
|
* This creates a temporary canvas, draws the PDF canvas first (background),
|
||||||
|
* then draws the Konva canvas on top (field overlays).
|
||||||
|
*
|
||||||
|
* @param pageNumber - The page number to composite (1-indexed)
|
||||||
|
* @returns Promise that resolves to a PNG Blob, or null if page not found or compositing fails
|
||||||
|
*/
|
||||||
|
export const compositePageToBlob = async (pageNumber: number): Promise<Blob | null> => {
|
||||||
|
const refs = getPageCanvasRefs(pageNumber);
|
||||||
|
|
||||||
|
if (!refs) {
|
||||||
|
console.warn(`Page ${pageNumber} is not registered for canvas capture`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Create temporary canvas with same dimensions as PDF canvas
|
||||||
|
const tempCanvas = document.createElement('canvas');
|
||||||
|
tempCanvas.width = refs.pdfCanvas.width;
|
||||||
|
tempCanvas.height = refs.pdfCanvas.height;
|
||||||
|
|
||||||
|
const ctx = tempCanvas.getContext('2d');
|
||||||
|
if (!ctx) {
|
||||||
|
console.error('Failed to get 2D context for temporary canvas');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw PDF canvas first (background layer)
|
||||||
|
ctx.drawImage(refs.pdfCanvas, 0, 0);
|
||||||
|
|
||||||
|
// Get Konva canvas and draw on top (field overlays)
|
||||||
|
// Note: Konva's toCanvas() returns a new canvas with all layers rendered
|
||||||
|
const konvaCanvas = refs.konvaStage.toCanvas();
|
||||||
|
ctx.drawImage(konvaCanvas, 0, 0);
|
||||||
|
|
||||||
|
// Convert to PNG Blob
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
tempCanvas.toBlob((blob) => {
|
||||||
|
if (blob) {
|
||||||
|
resolve(blob);
|
||||||
|
} else {
|
||||||
|
reject(new Error('Failed to convert canvas to blob'));
|
||||||
|
}
|
||||||
|
}, 'image/png');
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error compositing page ${pageNumber}:`, error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
19
packages/lib/universal/field-renderer/field-constants.ts
Normal file
19
packages/lib/universal/field-renderer/field-constants.ts
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
/**
|
||||||
|
* Shared constants for field dimension enforcement.
|
||||||
|
*
|
||||||
|
* These constants ensure consistency between:
|
||||||
|
* 1. AI prompt (server/api/ai.ts) - instructs Gemini on minimum field dimensions
|
||||||
|
* 2. Client enforcement (envelope-editor-fields-page.tsx) - fallback validation
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimum field height in pixels.
|
||||||
|
* Fields smaller than this will be expanded to meet minimum usability requirements.
|
||||||
|
*/
|
||||||
|
export const MIN_FIELD_HEIGHT_PX = 30;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimum field width in pixels.
|
||||||
|
* Fields smaller than this will be expanded to meet minimum usability requirements.
|
||||||
|
*/
|
||||||
|
export const MIN_FIELD_WIDTH_PX = 36;
|
||||||
Reference in New Issue
Block a user