diff --git a/apps/remix/app/components/dialogs/document-ai-prompt-dialog.tsx b/apps/remix/app/components/dialogs/recipient-detection-prompt-dialog.tsx similarity index 76% rename from apps/remix/app/components/dialogs/document-ai-prompt-dialog.tsx rename to apps/remix/app/components/dialogs/recipient-detection-prompt-dialog.tsx index 7ad5cab57..9ed0d10d9 100644 --- a/apps/remix/app/components/dialogs/document-ai-prompt-dialog.tsx +++ b/apps/remix/app/components/dialogs/recipient-detection-prompt-dialog.tsx @@ -15,22 +15,22 @@ import { DialogTitle, } from '@documenso/ui/primitives/dialog'; -type DocumentAiStep = 'PROMPT' | 'PROCESSING'; +type RecipientDetectionStep = 'PROMPT' | 'PROCESSING'; -export type DocumentAiPromptDialogProps = { +export type RecipientDetectionPromptDialogProps = { open: boolean; onOpenChange: (open: boolean) => void; onAccept: () => Promise | void; onSkip: () => void; }; -export const DocumentAiPromptDialog = ({ +export const RecipientDetectionPromptDialog = ({ open, onOpenChange, onAccept, onSkip, -}: DocumentAiPromptDialogProps) => { - const [currentStep, setCurrentStep] = useState('PROMPT'); +}: RecipientDetectionPromptDialogProps) => { + const [currentStep, setCurrentStep] = useState('PROMPT'); // Reset to first step when dialog closes useEffect(() => { @@ -39,7 +39,7 @@ export const DocumentAiPromptDialog = ({ } }, [open]); - const handleUseAi = () => { + const handleStartDetection = () => { setCurrentStep('PROCESSING'); Promise.resolve(onAccept()).catch(() => { @@ -61,12 +61,12 @@ export const DocumentAiPromptDialog = ({ <> - Use AI to prepare your document? + Auto-detect recipients? - Would you like to use AI to automatically add recipients to your document? - This can save you time in setting up your document. + Would you like to automatically detect recipients in your document? This can + save you time in setting up your document. @@ -75,8 +75,8 @@ export const DocumentAiPromptDialog = ({ - @@ -90,8 +90,7 @@ export const DocumentAiPromptDialog = ({ - Our AI is scanning your document to detect recipient names, emails, and - signing order. + Scanning your document to detect recipient names, emails, and signing order. diff --git a/apps/remix/app/components/dialogs/document-ai-recipients-dialog.tsx b/apps/remix/app/components/dialogs/suggested-recipients-dialog.tsx similarity index 95% rename from apps/remix/app/components/dialogs/document-ai-recipients-dialog.tsx rename to apps/remix/app/components/dialogs/suggested-recipients-dialog.tsx index b571da6e6..e5bcf5c47 100644 --- a/apps/remix/app/components/dialogs/document-ai-recipients-dialog.tsx +++ b/apps/remix/app/components/dialogs/suggested-recipients-dialog.tsx @@ -34,9 +34,9 @@ import { } from '@documenso/ui/primitives/form/form'; import { FormErrorMessage } from '@documenso/ui/primitives/form/form-error-message'; -import type { RecipientForCreation } from '~/utils/analyze-ai-recipients'; +import type { RecipientForCreation } from '~/utils/detect-document-recipients'; -const ZDocumentAiRecipientSchema = z.object({ +const ZSuggestedRecipientSchema = z.object({ formId: z.string().min(1), name: z .string() @@ -50,15 +50,15 @@ const ZDocumentAiRecipientSchema = z.object({ role: z.nativeEnum(RecipientRole), }); -const ZDocumentAiRecipientsForm = z.object({ +const ZSuggestedRecipientsFormSchema = z.object({ recipients: z - .array(ZDocumentAiRecipientSchema) + .array(ZSuggestedRecipientSchema) .min(1, { message: msg`Please add at least one recipient`.id }), }); -type TDocumentAiRecipientsForm = z.infer; +type TSuggestedRecipientsFormSchema = z.infer; -export type DocumentAiRecipientsDialogProps = { +export type SuggestedRecipientsDialogProps = { open: boolean; recipients: RecipientForCreation[] | null; onOpenChange: (open: boolean) => void; @@ -66,13 +66,13 @@ export type DocumentAiRecipientsDialogProps = { onSubmit: (recipients: RecipientForCreation[]) => Promise | void; }; -export const DocumentAiRecipientsDialog = ({ +export const SuggestedRecipientsDialog = ({ open, recipients, onOpenChange, onCancel, onSubmit, -}: DocumentAiRecipientsDialogProps) => { +}: SuggestedRecipientsDialogProps) => { const { t } = useLingui(); const [recipientSearchQuery, setRecipientSearchQuery] = useState(''); @@ -117,8 +117,8 @@ export const DocumentAiRecipientsDialog = ({ ]; }, [recipients]); - const form = useForm({ - resolver: zodResolver(ZDocumentAiRecipientsForm), + const form = useForm({ + resolver: zodResolver(ZSuggestedRecipientsFormSchema), defaultValues: { recipients: defaultRecipients, }, diff --git a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx index 9329d65f6..d2f7d09ae 100644 --- a/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx +++ b/apps/remix/app/components/general/envelope-editor/envelope-editor-fields-page.tsx @@ -54,30 +54,7 @@ const EnvelopeEditorFieldsPageRenderer = lazy( async () => import('./envelope-editor-fields-page-renderer'), ); -/** - * Enforces minimum field dimensions and centers the field when expanding to meet minimums. - * - * AI often detects form lines as very thin fields (0.2-0.5% height). This function ensures - * fields meet minimum usability requirements by expanding them to at least 30px height and - * 36px width, while keeping them centered on their original position. - * - * @param params - Field dimensions and page size - * @param params.positionX - Field X position as percentage (0-100) - * @param params.positionY - Field Y position as percentage (0-100) - * @param params.width - Field width as percentage (0-100) - * @param params.height - Field height as percentage (0-100) - * @param params.pageWidth - Page width in pixels - * @param params.pageHeight - Page height in pixels - * @returns Adjusted field dimensions with minimums enforced and centered - * - * @example - * // AI detected a thin line: 0.3% height - * const adjusted = enforceMinimumFieldDimensions({ - * positionX: 20, positionY: 50, width: 30, height: 0.3, - * pageWidth: 800, pageHeight: 1100 - * }); - * // Result: height expanded to ~2.7% (30px), centered on original position - */ +// Expands fields to minimum usable dimensions (30px height, 36px width) and centers them const enforceMinimumFieldDimensions = (params: { positionX: number; positionY: number; @@ -94,7 +71,6 @@ const enforceMinimumFieldDimensions = (params: { const MIN_HEIGHT_PX = 30; const MIN_WIDTH_PX = 36; - // Convert percentage to pixels to check against minimums const widthPx = (params.width / 100) * params.pageWidth; const heightPx = (params.height / 100) * params.pageHeight; @@ -136,7 +112,7 @@ const enforceMinimumFieldDimensions = (params: { }; }; -const processAllPagesWithAI = async (params: { +const detectFormFieldsInDocument = async (params: { envelopeId: string; onProgress: (current: number, total: number) => void; }): Promise<{ @@ -148,10 +124,9 @@ const processAllPagesWithAI = async (params: { const errors = new Map(); try { - // Make single API call to process all pages server-side onProgress(0, 1); - const response = await fetch('/api/ai/detect-form-fields', { + const response = await fetch('/api/ai/detect-fields', { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -162,12 +137,11 @@ const processAllPagesWithAI = async (params: { if (!response.ok) { const errorText = await response.text(); - throw new Error(`AI detection failed: ${response.statusText} - ${errorText}`); + throw new Error(`Field detection failed: ${response.statusText} - ${errorText}`); } const detectedFields: TDetectedFormField[] = await response.json(); - // Group fields by page number for (const field of detectedFields) { if (!fieldsPerPage.has(field.pageNumber)) { fieldsPerPage.set(field.pageNumber, []); @@ -177,7 +151,6 @@ const processAllPagesWithAI = async (params: { onProgress(1, 1); } catch (error) { - // If request fails, treat it as error for all pages errors.set(0, error instanceof Error ? error : new Error(String(error))); } @@ -206,7 +179,7 @@ export const EnvelopeEditorFieldsPage = () => { const { t } = useLingui(); const { toast } = useToast(); - const [isAutoAddingFields, setIsAutoAddingFields] = useState(false); + const [isDetectingFields, setIsAutoAddingFields] = useState(false); const [processingProgress, setProcessingProgress] = useState<{ current: number; total: number; @@ -224,14 +197,10 @@ export const EnvelopeEditorFieldsPage = () => { const isMetaSame = isDeepEqual(selectedField.fieldMeta, fieldMeta); - // Todo: Envelopes - Clean up console logs. if (!isMetaSame) { - console.log('TRIGGER UPDATE'); editorFields.updateFieldByFormId(selectedField.formId, { fieldMeta, }); - } else { - console.log('DATA IS SAME, NO UPDATE'); } }; @@ -251,7 +220,7 @@ export const EnvelopeEditorFieldsPage = () => {
{/* Horizontal envelope item selector */} - {isAutoAddingFields && ( + {isDetectingFields && ( <>
@@ -353,7 +322,7 @@ export const EnvelopeEditorFieldsPage = () => { diff --git a/apps/remix/app/components/general/envelope/envelope-drop-zone-wrapper.tsx b/apps/remix/app/components/general/envelope/envelope-drop-zone-wrapper.tsx index 475e7fcee..706bd48e4 100644 --- a/apps/remix/app/components/general/envelope/envelope-drop-zone-wrapper.tsx +++ b/apps/remix/app/components/general/envelope/envelope-drop-zone-wrapper.tsx @@ -1,7 +1,6 @@ import { type ReactNode, useState } from 'react'; -import { useLingui } from '@lingui/react/macro'; -import { Trans } from '@lingui/react/macro'; +import { Trans, useLingui } from '@lingui/react/macro'; import { EnvelopeType } from '@prisma/client'; import { Loader } from 'lucide-react'; import { @@ -27,14 +26,14 @@ import type { TCreateEnvelopePayload } from '@documenso/trpc/server/envelope-rou import { cn } from '@documenso/ui/lib/utils'; import { useToast } from '@documenso/ui/primitives/use-toast'; -import { DocumentAiPromptDialog } from '~/components/dialogs/document-ai-prompt-dialog'; -import { DocumentAiRecipientsDialog } from '~/components/dialogs/document-ai-recipients-dialog'; +import { RecipientDetectionPromptDialog } from '~/components/dialogs/recipient-detection-prompt-dialog'; +import { SuggestedRecipientsDialog } from '~/components/dialogs/suggested-recipients-dialog'; import { useCurrentTeam } from '~/providers/team'; import { type RecipientForCreation, - analyzeRecipientsFromDocument, + detectRecipientsInDocument, ensureRecipientEmails, -} from '~/utils/analyze-ai-recipients'; +} from '~/utils/detect-document-recipients'; export interface EnvelopeDropZoneWrapperProps { children: ReactNode; @@ -59,10 +58,10 @@ export const EnvelopeDropZoneWrapper = ({ const organisation = useCurrentOrganisation(); const [isLoading, setIsLoading] = useState(false); - const [showAiPromptDialog, setShowAiPromptDialog] = useState(false); + const [showRecipientDetectionPrompt, setShowRecipientDetectionPrompt] = useState(false); const [uploadedDocumentId, setUploadedDocumentId] = useState(null); const [pendingRecipients, setPendingRecipients] = useState(null); - const [showAiRecipientsDialog, setShowAiRecipientsDialog] = useState(false); + const [showSuggestedRecipientsDialog, setShowSuggestedRecipientsDialog] = useState(false); const [shouldNavigateAfterPromptClose, setShouldNavigateAfterPromptClose] = useState(true); const userTimezone = @@ -125,9 +124,9 @@ export const EnvelopeDropZoneWrapper = ({ // Show AI prompt dialog for documents setUploadedDocumentId(id); setPendingRecipients(null); - setShowAiRecipientsDialog(false); + setShowSuggestedRecipientsDialog(false); setShouldNavigateAfterPromptClose(true); - setShowAiPromptDialog(true); + setShowRecipientDetectionPrompt(true); } else { // Templates - navigate immediately const pathPrefix = formatTemplatesPath(team.url); @@ -228,13 +227,13 @@ export const EnvelopeDropZoneWrapper = ({ void navigate(`${pathPrefix}/${uploadedDocumentId}/edit`); }; - const handleAiAccept = async () => { + const handleStartRecipientDetection = async () => { if (!uploadedDocumentId) { return; } try { - const recipients = await analyzeRecipientsFromDocument(uploadedDocumentId); + const recipients = await detectRecipientsInDocument(uploadedDocumentId); if (recipients.length === 0) { toast({ @@ -250,14 +249,14 @@ export const EnvelopeDropZoneWrapper = ({ setPendingRecipients(recipientsWithEmails); setShouldNavigateAfterPromptClose(false); - setShowAiPromptDialog(false); - setShowAiRecipientsDialog(true); + setShowRecipientDetectionPrompt(false); + setShowSuggestedRecipientsDialog(true); } catch (error) { if (!(error instanceof Error && error.message === 'NO_RECIPIENTS_DETECTED')) { const parsedError = AppError.parseError(error); toast({ - title: t`Failed to analyze recipients`, + title: t`Failed to detect recipients`, description: parsedError.userMessage || t`You can add recipients manually in the editor`, variant: 'destructive', duration: 7500, @@ -268,14 +267,14 @@ export const EnvelopeDropZoneWrapper = ({ } }; - const handleAiSkip = () => { + const handleSkipRecipientDetection = () => { setShouldNavigateAfterPromptClose(true); - setShowAiPromptDialog(false); + setShowRecipientDetectionPrompt(false); navigateToEnvelopeEditor(); }; const handleRecipientsCancel = () => { - setShowAiRecipientsDialog(false); + setShowSuggestedRecipientsDialog(false); setPendingRecipients(null); navigateToEnvelopeEditor(); }; @@ -297,7 +296,7 @@ export const EnvelopeDropZoneWrapper = ({ duration: 5000, }); - setShowAiRecipientsDialog(false); + setShowSuggestedRecipientsDialog(false); setPendingRecipients(null); navigateToEnvelopeEditor(); } catch (error) { @@ -315,7 +314,7 @@ export const EnvelopeDropZoneWrapper = ({ }; const handlePromptDialogOpenChange = (open: boolean) => { - setShowAiPromptDialog(open); + setShowRecipientDetectionPrompt(open); if (open) { setShouldNavigateAfterPromptClose(true); @@ -394,21 +393,21 @@ export const EnvelopeDropZoneWrapper = ({
)} - - { if (!open) { handleRecipientsCancel(); } else { - setShowAiRecipientsDialog(true); + setShowSuggestedRecipientsDialog(true); } }} onCancel={handleRecipientsCancel} diff --git a/apps/remix/app/components/general/envelope/envelope-upload-button.tsx b/apps/remix/app/components/general/envelope/envelope-upload-button.tsx index 0a4ba7752..1bfcd3576 100644 --- a/apps/remix/app/components/general/envelope/envelope-upload-button.tsx +++ b/apps/remix/app/components/general/envelope/envelope-upload-button.tsx @@ -27,14 +27,14 @@ import { } from '@documenso/ui/primitives/tooltip'; import { useToast } from '@documenso/ui/primitives/use-toast'; -import { DocumentAiPromptDialog } from '~/components/dialogs/document-ai-prompt-dialog'; -import { DocumentAiRecipientsDialog } from '~/components/dialogs/document-ai-recipients-dialog'; +import { RecipientDetectionPromptDialog } from '~/components/dialogs/recipient-detection-prompt-dialog'; +import { SuggestedRecipientsDialog } from '~/components/dialogs/suggested-recipients-dialog'; import { useCurrentTeam } from '~/providers/team'; import { type RecipientForCreation, - analyzeRecipientsFromDocument, + detectRecipientsInDocument, ensureRecipientEmails, -} from '~/utils/analyze-ai-recipients'; +} from '~/utils/detect-document-recipients'; export type EnvelopeUploadButtonProps = { className?: string; @@ -62,10 +62,10 @@ export const EnvelopeUploadButton = ({ className, type, folderId }: EnvelopeUplo const { quota, remaining, refreshLimits, maximumEnvelopeItemCount } = useLimits(); const [isLoading, setIsLoading] = useState(false); - const [showAiPromptDialog, setShowAiPromptDialog] = useState(false); + const [showRecipientDetectionPrompt, setShowAiPromptDialog] = useState(false); const [uploadedDocumentId, setUploadedDocumentId] = useState(null); const [pendingRecipients, setPendingRecipients] = useState(null); - const [showAiRecipientsDialog, setShowAiRecipientsDialog] = useState(false); + const [showSuggestedRecipientsDialog, setShowAiRecipientsDialog] = useState(false); const [shouldNavigateAfterPromptClose, setShouldNavigateAfterPromptClose] = useState(true); const { mutateAsync: createEnvelope } = trpc.envelope.create.useMutation(); @@ -204,13 +204,13 @@ export const EnvelopeUploadButton = ({ className, type, folderId }: EnvelopeUplo void navigate(`${pathPrefix}/${uploadedDocumentId}/edit`); }; - const handleAiAccept = async () => { + const handleStartRecipientDetection = async () => { if (!uploadedDocumentId) { return; } try { - const recipients = await analyzeRecipientsFromDocument(uploadedDocumentId); + const recipients = await detectRecipientsInDocument(uploadedDocumentId); if (recipients.length === 0) { toast({ @@ -244,7 +244,7 @@ export const EnvelopeUploadButton = ({ className, type, folderId }: EnvelopeUplo } }; - const handleAiSkip = () => { + const handleSkipRecipientDetection = () => { setShouldNavigateAfterPromptClose(true); setShowAiPromptDialog(false); navigateToEnvelopeEditor(); @@ -336,15 +336,15 @@ export const EnvelopeUploadButton = ({ className, type, folderId }: EnvelopeUplo - - { if (!open) { diff --git a/apps/remix/app/utils/analyze-ai-recipients.ts b/apps/remix/app/utils/detect-document-recipients.ts similarity index 68% rename from apps/remix/app/utils/analyze-ai-recipients.ts rename to apps/remix/app/utils/detect-document-recipients.ts index 3914f16d9..b4f920cea 100644 --- a/apps/remix/app/utils/analyze-ai-recipients.ts +++ b/apps/remix/app/utils/detect-document-recipients.ts @@ -1,17 +1,19 @@ import { RecipientRole } from '@prisma/client'; -import { AppError } from '@documenso/lib/errors/app-error'; +import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error'; -export type AiRecipient = { +export type SuggestedRecipient = { name: string; email?: string; role: 'SIGNER' | 'APPROVER' | 'CC'; signingOrder?: number; }; -export const analyzeRecipientsFromDocument = async (envelopeId: string): Promise => { +export const detectRecipientsInDocument = async ( + envelopeId: string, +): Promise => { try { - const response = await fetch('/api/ai/analyze-recipients', { + const response = await fetch('/api/ai/detect-recipients', { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -20,10 +22,12 @@ export const analyzeRecipientsFromDocument = async (envelopeId: string): Promise }); if (!response.ok) { - throw new Error('Failed to analyze recipients'); + throw new AppError(AppErrorCode.UNKNOWN_ERROR, { + message: 'Failed to detect recipients', + }); } - return (await response.json()) as AiRecipient[]; + return (await response.json()) as SuggestedRecipient[]; } catch (error) { throw AppError.parseError(error); } @@ -37,7 +41,7 @@ export type RecipientForCreation = { }; export const ensureRecipientEmails = ( - recipients: AiRecipient[], + recipients: SuggestedRecipient[], envelopeId: string, ): RecipientForCreation[] => { const allowedRoles: RecipientRole[] = [ diff --git a/apps/remix/server/api/ai.ts b/apps/remix/server/api/document-analysis/index.ts similarity index 67% rename from apps/remix/server/api/ai.ts rename to apps/remix/server/api/document-analysis/index.ts index a10575143..06921e245 100644 --- a/apps/remix/server/api/ai.ts +++ b/apps/remix/server/api/document-analysis/index.ts @@ -26,17 +26,20 @@ const pdfjsLib = require('pdfjs-dist/legacy/build/pdf.js'); import { mkdir, writeFile } from 'node:fs/promises'; import { join } from 'node:path'; + +import { generateObject } from 'ai'; +import { Hono } from 'hono'; +import sharp from 'sharp'; +import { z } from 'zod'; + import { getSession } from '@documenso/auth/server/lib/utils/get-session'; import { AppError, AppErrorCode } from '@documenso/lib/errors/app-error'; import { getTeamById } from '@documenso/lib/server-only/team/get-team'; import { getFileServerSide } from '@documenso/lib/universal/upload/get-file.server'; import { env } from '@documenso/lib/utils/env'; import { prisma } from '@documenso/prisma'; -import { generateObject } from 'ai'; -import { Hono } from 'hono'; -import sharp from 'sharp'; -import { z } from 'zod'; +import { ANALYZE_RECIPIENTS_PROMPT, DETECT_OBJECTS_PROMPT } from './ai.prompts'; import type { HonoEnv } from '../router'; import { type TAnalyzeRecipientsResponse, @@ -101,89 +104,6 @@ const resizeAndCompressImage = async (imageBuffer: Buffer): Promise => { return await sharp(imageBuffer).jpeg({ quality: 70 }).toBuffer(); }; -const detectObjectsPrompt = `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform. - -IMPORTANT RULES: -1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data) -2. Analyze nearby text labels to determine the field type -3. Return bounding boxes for the fillable area only, NOT the label text -4. Each boundingBox must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale - -CRITICAL: UNDERSTANDING FILLABLE AREAS -The "fillable area" is ONLY the empty space where a user will write, type, sign, or check. -- ✓ CORRECT: The blank underscore where someone writes their name: "Name: _________" → box ONLY the underscores -- ✓ CORRECT: The empty white rectangle inside a box outline → box ONLY the empty space, not any printed text -- ✓ CORRECT: The blank space to the right of a label: "Email: [ empty box ]" → box ONLY the empty box, exclude "Email:" -- ✗ INCORRECT: Including the word "Signature:" that appears to the left of a signature line -- ✗ INCORRECT: Including printed labels, instructions, or descriptive text near the field -- ✗ INCORRECT: Extending the box to include text just because it's close to the fillable area - -VISUALIZING THE DISTINCTION: -- If there's text (printed words/labels) near an empty box or line, they are SEPARATE elements -- The text is a LABEL telling the user what to fill -- The empty space is the FILLABLE AREA where they actually write/sign -- Your bounding box should capture ONLY the empty space, even if the label is immediately adjacent - -FIELD TYPES TO DETECT: -• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____' -• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields -• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name' -• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:' -• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____' -• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes -• RADIO - Empty radio button circles (○) in groups, typically circular selection options -• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#' -• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select' -• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain - -DETECTION GUIDELINES: -- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type -- IMPORTANT: Use the nearby text to CLASSIFY the field type, but DO NOT include that text in the bounding box -- If you're uncertain which type fits best, default to TEXT -- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label -- Signature fields are often longer horizontal lines or larger boxes -- Date fields often show format hints or date separators (slashes, dashes) -- Look for visual patterns: underscores (____), horizontal lines, box outlines - -BOUNDING BOX PLACEMENT (CRITICAL): -- Your coordinates must capture ONLY the empty fillable space (the blank area where input goes) -- Once you find the fillable region, LOCK the box to the full boundary of that region (top, bottom, left, right). Do not leave the box floating over just the starting edge. -- If the field is defined by a line or a rectangular border, extend xmin/xmax/ymin/ymax across the entire line/border so the box spans the whole writable area end-to-end. -- EXCLUDE all printed text labels, even if they are: - · Directly to the left of the field (e.g., "Name: _____") - · Directly above the field (e.g., "Signature" printed above a line) - · Very close to the field with minimal spacing - · Inside the same outlined box as the fillable area -- The label text helps you IDENTIFY the field type, but must be EXCLUDED from the bounding box -- If you detect a label "Email:" followed by a blank box, draw the box around ONLY the blank box, not the word "Email:" -- The box should never cover only the leftmost few characters of a long field. For "Signature: ____________", the box must stretch from the first underscore to the last. - -COORDINATE SYSTEM: -- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale -- Top-left corner: ymin and xmin close to 0 -- Bottom-right corner: ymax and xmax close to 1000 -- Coordinates represent positions on a 1000x1000 grid overlaid on the image - -FIELD SIZING STRATEGY FOR LINE-BASED FIELDS: -When detecting thin horizontal lines for SIGNATURE, INITIALS, NAME, EMAIL, DATE, TEXT, or NUMBER fields: -1. Analyze the visual context around the detected line: - - Look at the empty space ABOVE the detected line - - Observe the spacing to any text labels, headers, or other form elements above - - Assess what would be a reasonable field height to make the field clearly visible when filled -2. Expand UPWARD from the detected line to create a usable field: - - Keep ymax (bottom) at the detected line position (the line becomes the bottom edge) - - Extend ymin (top) upward into the available whitespace - - Aim to use 60-80% of the clear whitespace above the line, while being reasonable - - The expanded field should provide comfortable space for signing/writing (minimum 30 units tall) -3. Apply minimum dimensions: height at least 30 units (3% of 1000-scale), width at least 36 units -4. Ensure ymin >= 0 (do not go off-page). If ymin would be negative, clamp to 0 -5. Do NOT apply this expansion to CHECKBOX, RADIO, or DROPDOWN fields - use detected dimensions for those -6. Example: If you detect a signature line at ymax=500 with clear whitespace extending up to y=400: - - Available whitespace: 100 units - - Use 60-80% of that: 60-80 units - - Expanded field: [ymin=420, xmin=200, ymax=500, xmax=600] (creates 80-unit tall field) - - This gives comfortable signing space while respecting the form layout`; - type FieldDetectionRecipient = { id: number; name: string | null; @@ -194,7 +114,7 @@ type FieldDetectionRecipient = { const buildFieldDetectionPrompt = (recipients: FieldDetectionRecipient[]) => { if (recipients.length === 0) { - return detectObjectsPrompt; + return DETECT_OBJECTS_PROMPT; } const directory = recipients @@ -214,7 +134,7 @@ const buildFieldDetectionPrompt = (recipients: FieldDetectionRecipient[]) => { }) .join('\n'); - return `${detectObjectsPrompt}\n\nRECIPIENT DIRECTORY:\n${directory}\n\nRECIPIENT ASSIGNMENT RULES:\n1. Every detected field MUST include a "recipientId" taken from the directory above.\n2. Match printed names, role labels ("Buyer", "Seller"), or instructions near the field to the closest recipient.\n3. When the document references numbered signers (Signer 1, Signer 2, etc.), align them with signingOrder when provided.\n4. If a name exactly matches a recipient, always use that recipient's ID.\n5. When context is ambiguous, distribute fields logically across recipients instead of assigning all fields to one person.\n6. Never invent new recipients or IDs—only use those in the directory.`; + return `${DETECT_OBJECTS_PROMPT}\n\nRECIPIENT DIRECTORY:\n${directory}\n\nRECIPIENT ASSIGNMENT RULES:\n1. Every detected field MUST include a "recipientId" taken from the directory above.\n2. Match printed names, role labels ("Buyer", "Seller"), or instructions near the field to the closest recipient.\n3. When the document references numbered signers (Signer 1, Signer 2, etc.), align them with signingOrder when provided.\n4. If a name exactly matches a recipient, always use that recipient's ID.\n5. When context is ambiguous, distribute fields logically across recipients instead of assigning all fields to one person.\n6. Never invent new recipients or IDs—only use those in the directory.`; }; const runFormFieldDetection = async ( @@ -280,6 +200,7 @@ const runFormFieldDetection = async ( }); }; +// Limit recipient detection to first 3 pages for performance and cost efficiency const MAX_PAGES_FOR_RECIPIENT_ANALYSIS = 3; const recipientEmailSchema = z.string().email(); @@ -347,45 +268,8 @@ const authorizeDocumentAccess = async (envelopeId: string, userId: number) => { return documentData; }; -const analyzeRecipientsPrompt = `You are analyzing a document to identify recipients who need to sign, approve, or receive copies. - -TASK: Extract recipient information from this document. - -RECIPIENT TYPES: -- SIGNER: People who must sign the document (look for signature lines, "Signed by:", "Signature:", "X____") -- APPROVER: People who must review/approve before signing (look for "Approved by:", "Reviewed by:", "Approval:") -- CC: People who receive a copy for information only (look for "CC:", "Copy to:", "For information:") - -EXTRACTION RULES: -1. Look for signature lines with names printed above, below, or near them -2. Check for explicit labels like "Name:", "Signer:", "Party:", "Recipient:" -3. Look for "Approved by:", "Reviewed by:", "CC:" sections -4. Extract FULL NAMES as they appear in the document -5. If an email address is visible near a name, include it exactly in the "email" field -6. If NO email is found, leave the email field empty. -7. Assign signing order based on document flow (numbered items, "First signer:", "Second signer:", or top-to-bottom sequence) - -IMPORTANT: -- Only extract recipients explicitly mentioned in the document -- Default role is SIGNER if unclear (signature lines = SIGNER) -- Signing order starts at 1 (first signer = 1, second = 2, etc.) -- If no clear ordering, omit signingOrder -- Return empty array if absolutely no recipients can be detected -- Do NOT invent recipients - only extract what's clearly present - -EXAMPLES: -Good: - - "Signed: _________ John Doe" → { name: "John Doe", role: "SIGNER", signingOrder: 1 } - - "Approved by: Jane Smith (jane@example.com)" → { name: "Jane Smith", email: "jane@example.com", role: "APPROVER" } - - "CC: Legal Team" → { name: "Legal Team", role: "CC" } - -Bad: - - Extracting the document title as a recipient name - - Making up email addresses that aren't in the document - - Adding people not mentioned in the document`; - export const aiRoute = new Hono() - .post('/detect-form-fields', async (c) => { + .post('/detect-fields', async (c) => { try { const { user } = await getSession(c.req.raw); @@ -602,7 +486,7 @@ export const aiRoute = new Hono() }); } }) - .post('/analyze-recipients', async (c) => { + .post('/detect-recipients', async (c) => { try { const { user } = await getSession(c.req.raw); @@ -650,7 +534,7 @@ export const aiRoute = new Hono() }, { type: 'text', - text: analyzeRecipientsPrompt, + text: ANALYZE_RECIPIENTS_PROMPT, }, ], }, diff --git a/apps/remix/server/api/document-analysis/prompts.ts b/apps/remix/server/api/document-analysis/prompts.ts new file mode 100644 index 000000000..38dcf32b6 --- /dev/null +++ b/apps/remix/server/api/document-analysis/prompts.ts @@ -0,0 +1,119 @@ +export const DETECT_OBJECTS_PROMPT = `You are analyzing a form document image to detect fillable fields for the Documenso document signing platform. + +IMPORTANT RULES: +1. Only detect EMPTY/UNFILLED fields (ignore boxes that already contain text or data) +2. Analyze nearby text labels to determine the field type +3. Return bounding boxes for the fillable area only, NOT the label text +4. Each boundingBox must be in the format [ymin, xmin, ymax, xmax] where all coordinates are NORMALIZED to a 0-1000 scale + +CRITICAL: UNDERSTANDING FILLABLE AREAS +The "fillable area" is ONLY the empty space where a user will write, type, sign, or check. +- ✓ CORRECT: The blank underscore where someone writes their name: "Name: _________" → box ONLY the underscores +- ✓ CORRECT: The empty white rectangle inside a box outline → box ONLY the empty space, not any printed text +- ✓ CORRECT: The blank space to the right of a label: "Email: [ empty box ]" → box ONLY the empty box, exclude "Email:" +- ✗ INCORRECT: Including the word "Signature:" that appears to the left of a signature line +- ✗ INCORRECT: Including printed labels, instructions, or descriptive text near the field +- ✗ INCORRECT: Extending the box to include text just because it's close to the fillable area + +VISUALIZING THE DISTINCTION: +- If there's text (printed words/labels) near an empty box or line, they are SEPARATE elements +- The text is a LABEL telling the user what to fill +- The empty space is the FILLABLE AREA where they actually write/sign +- Your bounding box should capture ONLY the empty space, even if the label is immediately adjacent + +FIELD TYPES TO DETECT: +• SIGNATURE - Signature lines, boxes labeled 'Signature', 'Sign here', 'Authorized signature', 'X____' +• INITIALS - Small boxes labeled 'Initials', 'Initial here', typically smaller than signature fields +• NAME - Boxes labeled 'Name', 'Full name', 'Your name', 'Print name', 'Printed name' +• EMAIL - Boxes labeled 'Email', 'Email address', 'E-mail', 'Email:' +• DATE - Boxes labeled 'Date', 'Date signed', "Today's date", or showing date format placeholders like 'MM/DD/YYYY', '__/__/____' +• CHECKBOX - Empty checkbox squares (☐) with or without labels, typically small square boxes +• RADIO - Empty radio button circles (○) in groups, typically circular selection options +• NUMBER - Boxes labeled with numeric context: 'Amount', 'Quantity', 'Phone', 'Phone number', 'ZIP', 'ZIP code', 'Age', 'Price', '#' +• DROPDOWN - Boxes with dropdown indicators (▼, ↓) or labeled 'Select', 'Choose', 'Please select' +• TEXT - Any other empty text input boxes, general input fields, unlabeled boxes, or when field type is uncertain + +DETECTION GUIDELINES: +- Read text located near the box (above, to the left, or inside the box boundary) to infer the field type +- IMPORTANT: Use the nearby text to CLASSIFY the field type, but DO NOT include that text in the bounding box +- If you're uncertain which type fits best, default to TEXT +- For checkboxes and radio buttons: Detect each individual box/circle separately, not the label +- Signature fields are often longer horizontal lines or larger boxes +- Date fields often show format hints or date separators (slashes, dashes) +- Look for visual patterns: underscores (____), horizontal lines, box outlines + +BOUNDING BOX PLACEMENT (CRITICAL): +- Your coordinates must capture ONLY the empty fillable space (the blank area where input goes) +- Once you find the fillable region, LOCK the box to the full boundary of that region (top, bottom, left, right). Do not leave the box floating over just the starting edge. +- If the field is defined by a line or a rectangular border, extend xmin/xmax/ymin/ymax across the entire line/border so the box spans the whole writable area end-to-end. +- EXCLUDE all printed text labels, even if they are: + · Directly to the left of the field (e.g., "Name: _____") + · Directly above the field (e.g., "Signature" printed above a line) + · Very close to the field with minimal spacing + · Inside the same outlined box as the fillable area +- The label text helps you IDENTIFY the field type, but must be EXCLUDED from the bounding box +- If you detect a label "Email:" followed by a blank box, draw the box around ONLY the blank box, not the word "Email:" +- The box should never cover only the leftmost few characters of a long field. For "Signature: ____________", the box must stretch from the first underscore to the last. + +COORDINATE SYSTEM: +- [ymin, xmin, ymax, xmax] normalized to 0-1000 scale +- Top-left corner: ymin and xmin close to 0 +- Bottom-right corner: ymax and xmax close to 1000 +- Coordinates represent positions on a 1000x1000 grid overlaid on the image + +FIELD SIZING STRATEGY FOR LINE-BASED FIELDS: +When detecting thin horizontal lines for SIGNATURE, INITIALS, NAME, EMAIL, DATE, TEXT, or NUMBER fields: +1. Analyze the visual context around the detected line: + - Look at the empty space ABOVE the detected line + - Observe the spacing to any text labels, headers, or other form elements above + - Assess what would be a reasonable field height to make the field clearly visible when filled +2. Expand UPWARD from the detected line to create a usable field: + - Keep ymax (bottom) at the detected line position (the line becomes the bottom edge) + - Extend ymin (top) upward into the available whitespace + - Aim to use 60-80% of the clear whitespace above the line, while being reasonable + - The expanded field should provide comfortable space for signing/writing (minimum 30 units tall) +3. Apply minimum dimensions: height at least 30 units (3% of 1000-scale), width at least 36 units +4. Ensure ymin >= 0 (do not go off-page). If ymin would be negative, clamp to 0 +5. Do NOT apply this expansion to CHECKBOX, RADIO, or DROPDOWN fields - use detected dimensions for those +6. Example: If you detect a signature line at ymax=500 with clear whitespace extending up to y=400: + - Available whitespace: 100 units + - Use 60-80% of that: 60-80 units + - Expanded field: [ymin=420, xmin=200, ymax=500, xmax=600] (creates 80-unit tall field) + - This gives comfortable signing space while respecting the form layout`; + +export const ANALYZE_RECIPIENTS_PROMPT = `You are analyzing a document to identify recipients who need to sign, approve, or receive copies. + +TASK: Extract recipient information from this document. + +RECIPIENT TYPES: +- SIGNER: People who must sign the document (look for signature lines, "Signed by:", "Signature:", "X____") +- APPROVER: People who must review/approve before signing (look for "Approved by:", "Reviewed by:", "Approval:") +- CC: People who receive a copy for information only (look for "CC:", "Copy to:", "For information:") + +EXTRACTION RULES: +1. Look for signature lines with names printed above, below, or near them +2. Check for explicit labels like "Name:", "Signer:", "Party:", "Recipient:" +3. Look for "Approved by:", "Reviewed by:", "CC:" sections +4. Extract FULL NAMES as they appear in the document +5. If an email address is visible near a name, include it exactly in the "email" field +6. If NO email is found, leave the email field empty. +7. Assign signing order based on document flow (numbered items, "First signer:", "Second signer:", or top-to-bottom sequence) + +IMPORTANT: +- Only extract recipients explicitly mentioned in the document +- Default role is SIGNER if unclear (signature lines = SIGNER) +- Signing order starts at 1 (first signer = 1, second = 2, etc.) +- If no clear ordering, omit signingOrder +- Return empty array if absolutely no recipients can be detected +- Do NOT invent recipients - only extract what's clearly present + +EXAMPLES: +Good: + - "Signed: _________ John Doe" → { name: "John Doe", role: "SIGNER", signingOrder: 1 } + - "Approved by: Jane Smith (jane@example.com)" → { name: "Jane Smith", email: "jane@example.com", role: "APPROVER" } + - "CC: Legal Team" → { name: "Legal Team", role: "CC" } + +Bad: + - Extracting the document title as a recipient name + - Making up email addresses that aren't in the document + - Adding people not mentioned in the document`; diff --git a/apps/remix/server/api/ai.types.ts b/apps/remix/server/api/document-analysis/types.ts similarity index 100% rename from apps/remix/server/api/ai.types.ts rename to apps/remix/server/api/document-analysis/types.ts diff --git a/packages/lib/server-only/recipient/create-envelope-recipients.ts b/packages/lib/server-only/recipient/create-envelope-recipients.ts index 15a54431f..344f613c9 100644 --- a/packages/lib/server-only/recipient/create-envelope-recipients.ts +++ b/packages/lib/server-only/recipient/create-envelope-recipients.ts @@ -15,7 +15,7 @@ import type { EnvelopeIdOptions } from '../../utils/envelope'; import { mapRecipientToLegacyRecipient, sanitizeRecipientName } from '../../utils/recipients'; import { getEnvelopeWhereInput } from '../envelope/get-envelope-by-id'; -export interface CreateEnvelopeRecipientsOptions { +export type CreateEnvelopeRecipientsOptions = { userId: number; teamId: number; id: EnvelopeIdOptions; @@ -28,7 +28,7 @@ export interface CreateEnvelopeRecipientsOptions { actionAuth?: TRecipientActionAuthTypes[]; }[]; requestMetadata: ApiRequestMetadata; -} +}; export const createEnvelopeRecipients = async ({ userId, diff --git a/packages/lib/server-only/recipient/set-document-recipients.ts b/packages/lib/server-only/recipient/set-document-recipients.ts index 6d82f91ef..2a357fa5f 100644 --- a/packages/lib/server-only/recipient/set-document-recipients.ts +++ b/packages/lib/server-only/recipient/set-document-recipients.ts @@ -33,13 +33,13 @@ import { renderEmailWithI18N } from '../../utils/render-email-with-i18n'; import { getEmailContext } from '../email/get-email-context'; import { getEnvelopeWhereInput } from '../envelope/get-envelope-by-id'; -export interface SetDocumentRecipientsOptions { +export type SetDocumentRecipientsOptions = { userId: number; teamId: number; id: EnvelopeIdOptions; recipients: RecipientData[]; requestMetadata: ApiRequestMetadata; -} +}; export const setDocumentRecipients = async ({ userId, diff --git a/packages/lib/server-only/recipient/update-envelope-recipients.ts b/packages/lib/server-only/recipient/update-envelope-recipients.ts index acfee9ca9..613c159dc 100644 --- a/packages/lib/server-only/recipient/update-envelope-recipients.ts +++ b/packages/lib/server-only/recipient/update-envelope-recipients.ts @@ -21,7 +21,7 @@ import { mapFieldToLegacyField } from '../../utils/fields'; import { canRecipientBeModified, sanitizeRecipientName } from '../../utils/recipients'; import { getEnvelopeWhereInput } from '../envelope/get-envelope-by-id'; -export interface UpdateEnvelopeRecipientsOptions { +export type UpdateEnvelopeRecipientsOptions = { userId: number; teamId: number; id: EnvelopeIdOptions; @@ -35,7 +35,7 @@ export interface UpdateEnvelopeRecipientsOptions { actionAuth?: TRecipientActionAuthTypes[]; }[]; requestMetadata: ApiRequestMetadata; -} +}; export const updateEnvelopeRecipients = async ({ userId, diff --git a/packages/lib/types/ai.ts b/packages/lib/types/document-analysis.ts similarity index 100% rename from packages/lib/types/ai.ts rename to packages/lib/types/document-analysis.ts