refactor: improve variable naming and streamline placeholder extraction logic in PDF processing

chore: some cleanup
2025-11-13 16:23:06 +10:00 · 2025-11-04 14:18:01 +02:00 · 2025-11-04 11:16:53 +02:00
1 changed files with 67 additions and 61 deletions
--- a/packages/lib/server-only/pdf/auto-place-fields.ts
+++ b/packages/lib/server-only/pdf/auto-place-fields.ts
@ -25,7 +25,7 @@ type TextPosition = {
 };
 type CharIndexMapping = {
-  textPosIndex: number;
+  textPositionIndex: number;
 };
 type PlaceholderInfo = {
@ -121,28 +121,26 @@ const parseFieldMeta = (
    rawFieldMeta is an object with string keys and string values.
    It contains string values because the PDF parser returns the values as strings.
-    E.g. { required: 'true', fontSize: '12', maxValue: '100', minValue: '0', characterLimit: '100' }
+    E.g. { 'required': 'true', 'fontSize': '12', 'maxValue': '100', 'minValue': '0', 'characterLimit': '100' }
  */
  const rawFieldMetaEntries = Object.entries(rawFieldMeta);
-  for (const entry of rawFieldMetaEntries) {
+  for (const [property, value] of rawFieldMetaEntries) {
-    const [key, value] = entry;
+    if (property === 'readOnly' || property === 'required') {
-
+      parsedFieldMeta[property] = value === 'true';
    if (key === 'readOnly' || key === 'required') {
      parsedFieldMeta[key] = value === 'true';
    } else if (
-      key === 'fontSize' ||
+      property === 'fontSize' ||
-      key === 'maxValue' ||
+      property === 'maxValue' ||
-      key === 'minValue' ||
+      property === 'minValue' ||
-      key === 'characterLimit'
+      property === 'characterLimit'
    ) {
      const numValue = Number(value);
      if (!Number.isNaN(numValue)) {
-        parsedFieldMeta[key] = numValue;
+        parsedFieldMeta[property] = numValue;
      }
    } else {
-      parsedFieldMeta[key] = value;
+      parsedFieldMeta[property] = value;
    }
  }
@ -168,25 +166,26 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
          Page dimensions from PDF2JSON are in "page units" (relative coordinates)
        */
        const pageWidth = page.Width;
        const pageHeight = page.Height;
        let pageText = '';
        const textPositions: TextPosition[] = [];
-        const charIndexToTextPos: CharIndexMapping[] = [];
+        const charIndexMappings: CharIndexMapping[] = [];
        page.Texts.forEach((text) => {
          /*
-            R is an array that contains objects with each character.
+            R is an array of objects containing each character, its position and styling information.
-            The decodedText contains only the character, without any other information.
+            The decodedText stores the characters, without any other information.
            textPositions stores each character and its position on the page.
          */
          const decodedText = text.R.map((run) => decodeURIComponent(run.T)).join('');
          /*
            For each character in the decodedText, we store its position in the textPositions array.
            This allows us to quickly find the position of a character in the textPositions array by its index.
          */
          for (let i = 0; i < decodedText.length; i++) {
-            charIndexToTextPos.push({
+            charIndexMappings.push({
-              textPosIndex: textPositions.length,
+              textPositionIndex: textPositions.length,
            });
          }
@ -202,13 +201,25 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
        const placeholderMatches = pageText.matchAll(/{{([^}]+)}}/g);
        /*
          A placeholder match has the following format:
          [
            '{{fieldType,recipient,fieldMeta}}',
            'fieldType,recipient,fieldMeta',
            'index: <number>',
            'input: <pdf-text>'
          ]
        */
        for (const placeholderMatch of placeholderMatches) {
          const placeholder = placeholderMatch[0];
-          const placeholderData = placeholderMatch[1].split(',').map((part) => part.trim());
+          const placeholderData = placeholderMatch[1].split(',').map((property) => property.trim());
          const [fieldTypeString, recipient, ...fieldMetaData] = placeholderData;
-          const rawFieldMeta = Object.fromEntries(fieldMetaData.map((meta) => meta.split('=')));
+          const rawFieldMeta = Object.fromEntries(
            fieldMetaData.map((property) => property.split('=')),
          );
          const fieldType = parseFieldType(fieldTypeString);
          const parsedFieldMeta = parseFieldMeta(rawFieldMeta, fieldType);
@ -219,51 +230,53 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
          });
          /*
-            Find the position of where the placeholder starts in the text
+            Find the position of where the placeholder starts and ends in the text.
-            Then find the position of where the placeholder ends in the text by adding the length of the placeholder to the index of the placeholder.
+            Then find the position of the characters in the textPositions array.
            This allows us to quickly find the position of a character in the textPositions array by its index.
          */
-          const matchIndex = placeholderMatch.index;
+          if (placeholderMatch.index === undefined) {
            console.error('Placeholder match index is undefined for placeholder', placeholder);
          if (matchIndex === undefined) {
            continue;
          }
-          const placeholderLength = placeholder.length;
+          const placeholderEndCharIndex = placeholderMatch.index + placeholder.length;
          const placeholderEndIndex = matchIndex + placeholderLength;
          const startCharInfo = charIndexToTextPos[matchIndex];
          const endCharInfo = charIndexToTextPos[placeholderEndIndex - 1];
          if (!startCharInfo || !endCharInfo) {
            console.error('Could not find text position for placeholder', placeholder);
            return;
          }
          const startTextPos = textPositions[startCharInfo.textPosIndex];
          const endTextPos = textPositions[endCharInfo.textPosIndex];
          /*
-            PDF2JSON coordinates - these are in "page units" (relative coordinates)
+            Get the index of the placeholder's first and last character in the textPositions array.
-            Calculate width as the distance from start to end, plus a portion of the last character's width
+            Used to retrieve the character information from the textPositions array.
-            Use 10% of the last character width to avoid extending too far beyond the placeholder
+
            Example:
              startTextPosIndex - 1
              endTextPosIndex - 40
          */
-          const x = startTextPos.x;
+          const startTextPosIndex = charIndexMappings[placeholderMatch.index].textPositionIndex;
-          const y = startTextPos.y;
+          const endTextPosIndex = charIndexMappings[placeholderEndCharIndex - 1].textPositionIndex;
-          const width = endTextPos.x + endTextPos.w * 0.1 - startTextPos.x;
+
          /*
            Get the placeholder's first and last character information from the textPositions array.
            Example:
              placeholderStart = { text: '{', x: 100, y: 100, w: 100 }
              placeholderEnd = { text: '}', x: 200, y: 100, w: 100 }
          */
          const placeholderStart = textPositions[startTextPosIndex];
          const placeholderEnd = textPositions[endTextPosIndex];
          const width = placeholderEnd.x + placeholderEnd.w * 0.1 - placeholderStart.x;
          placeholders.push({
            placeholder,
            recipient,
            fieldAndMeta,
            page: pageIndex + 1,
-            x,
+            x: placeholderStart.x,
-            y,
+            y: placeholderStart.y,
            width,
            height: 1,
-            pageWidth,
+            pageWidth: page.Width,
-            pageHeight,
+            pageHeight: page.Height,
          });
        }
      });
@ -298,12 +311,6 @@ export const replacePlaceholdersInPDF = async (pdf: Buffer): Promise<Buffer> =>
      - Need to convert from page units to points
      - Y-axis in pdf-lib is bottom-up (origin at bottom-left)
      - Y-axis in PDF2JSON is top-down (origin at top-left)
      Conversion formulas:
      - x_points = (x / pageWidth) * pdfLibPageWidth
      - y_points = pdfLibPageHeight - ((y / pageHeight) * pdfLibPageHeight)
      - width_points = (width / pageWidth) * pdfLibPageWidth
      - height_points = (height / pageHeight) * pdfLibPageHeight
    */
    const xPoints = (placeholder.x / placeholder.pageWidth) * pdfLibPageWidth;
@ -419,9 +426,9 @@ export const insertFieldsFromPlaceholdersInPDF = async (
    },
  });
-  const existingEmails = new Set(existingRecipients.map((r) => r.email.toLowerCase()));
+  const existingEmails = new Set(existingRecipients.map((r) => r.email));
  const recipientsToCreateFiltered = recipientsToCreate.filter(
-    (r) => !existingEmails.has(r.email.toLowerCase()),
+    (recipient) => !existingEmails.has(recipient.email),
  );
  let createdRecipients: Pick<Recipient, 'id' | 'email'>[] = existingRecipients;
@ -473,8 +480,7 @@ export const insertFieldsFromPlaceholdersInPDF = async (
    const heightPercent = (placeholder.height / placeholder.pageHeight) * 100;
    const { email } = extractRecipientPlaceholder(placeholder.recipient);
-    const normalizedEmail = email.toLowerCase();
+    const recipient = createdRecipients.find((r) => r.email === email);
    const recipient = createdRecipients.find((r) => r.email.toLowerCase() === normalizedEmail);
    if (!recipient) {
      throw new AppError(AppErrorCode.INVALID_BODY, {
Author	SHA1	Message	Date
Catalin Pit	a08a77e98b	refactor: improve variable naming and streamline placeholder extraction logic in PDF processing	2025-11-04 14:18:01 +02:00
Catalin Pit	13d9ca7a0e	chore: some cleanup	2025-11-04 11:16:53 +02:00