mirror of
https://github.com/documenso/documenso.git
synced 2025-11-10 04:22:32 +10:00
refactor: improve variable naming and streamline placeholder extraction logic in PDF processing
This commit is contained in:
@ -166,12 +166,9 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
|
|||||||
|
|
||||||
Page dimensions from PDF2JSON are in "page units" (relative coordinates)
|
Page dimensions from PDF2JSON are in "page units" (relative coordinates)
|
||||||
*/
|
*/
|
||||||
const pageWidth = page.Width;
|
|
||||||
const pageHeight = page.Height;
|
|
||||||
|
|
||||||
let pageText = '';
|
let pageText = '';
|
||||||
const textPositions: TextPosition[] = [];
|
const textPositions: TextPosition[] = [];
|
||||||
const charIndexToTextPos: CharIndexMapping[] = [];
|
const charIndexMappings: CharIndexMapping[] = [];
|
||||||
|
|
||||||
page.Texts.forEach((text) => {
|
page.Texts.forEach((text) => {
|
||||||
/*
|
/*
|
||||||
@ -187,7 +184,7 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
|
|||||||
This allows us to quickly find the position of a character in the textPositions array by its index.
|
This allows us to quickly find the position of a character in the textPositions array by its index.
|
||||||
*/
|
*/
|
||||||
for (let i = 0; i < decodedText.length; i++) {
|
for (let i = 0; i < decodedText.length; i++) {
|
||||||
charIndexToTextPos.push({
|
charIndexMappings.push({
|
||||||
textPositionIndex: textPositions.length,
|
textPositionIndex: textPositions.length,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -216,11 +213,13 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
|
|||||||
*/
|
*/
|
||||||
for (const placeholderMatch of placeholderMatches) {
|
for (const placeholderMatch of placeholderMatches) {
|
||||||
const placeholder = placeholderMatch[0];
|
const placeholder = placeholderMatch[0];
|
||||||
const placeholderData = placeholderMatch[1].split(',').map((part) => part.trim());
|
const placeholderData = placeholderMatch[1].split(',').map((property) => property.trim());
|
||||||
|
|
||||||
const [fieldTypeString, recipient, ...fieldMetaData] = placeholderData;
|
const [fieldTypeString, recipient, ...fieldMetaData] = placeholderData;
|
||||||
|
|
||||||
const rawFieldMeta = Object.fromEntries(fieldMetaData.map((meta) => meta.split('=')));
|
const rawFieldMeta = Object.fromEntries(
|
||||||
|
fieldMetaData.map((property) => property.split('=')),
|
||||||
|
);
|
||||||
|
|
||||||
const fieldType = parseFieldType(fieldTypeString);
|
const fieldType = parseFieldType(fieldTypeString);
|
||||||
const parsedFieldMeta = parseFieldMeta(rawFieldMeta, fieldType);
|
const parsedFieldMeta = parseFieldMeta(rawFieldMeta, fieldType);
|
||||||
@ -231,10 +230,10 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
|
|||||||
});
|
});
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Find the position of where the placeholder starts in the text
|
Find the position of where the placeholder starts and ends in the text.
|
||||||
|
|
||||||
Then find the position of where the placeholder ends in the text
|
Then find the position of the characters in the textPositions array.
|
||||||
by adding the length of the placeholder to the index of the placeholder.
|
This allows us to quickly find the position of a character in the textPositions array by its index.
|
||||||
*/
|
*/
|
||||||
if (placeholderMatch.index === undefined) {
|
if (placeholderMatch.index === undefined) {
|
||||||
console.error('Placeholder match index is undefined for placeholder', placeholder);
|
console.error('Placeholder match index is undefined for placeholder', placeholder);
|
||||||
@ -242,41 +241,42 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const placeholderLength = placeholder.length;
|
const placeholderEndCharIndex = placeholderMatch.index + placeholder.length;
|
||||||
const placeholderEndIndex = placeholderMatch.index + placeholderLength;
|
|
||||||
|
|
||||||
const startCharacterIndex = charIndexToTextPos[placeholderMatch.index];
|
/*
|
||||||
const endCharacterIndex = charIndexToTextPos[placeholderEndIndex - 1];
|
Get the index of the placeholder's first and last character in the textPositions array.
|
||||||
|
Used to retrieve the character information from the textPositions array.
|
||||||
|
|
||||||
if (!startCharacterIndex || !endCharacterIndex) {
|
Example:
|
||||||
console.error('Could not find text position for placeholder', placeholder);
|
startTextPosIndex - 1
|
||||||
|
endTextPosIndex - 40
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const startTextPos = textPositions[startCharacterIndex.textPositionIndex];
|
|
||||||
const endTextPos = textPositions[endCharacterIndex.textPositionIndex];
|
|
||||||
|
|
||||||
/*
|
|
||||||
PDF2JSON coordinates - these are in "page units" (relative coordinates)
|
|
||||||
Calculate width as the distance from start to end, plus a portion of the last character's width
|
|
||||||
Use 10% of the last character width to avoid extending too far beyond the placeholder
|
|
||||||
*/
|
*/
|
||||||
const x = startTextPos.x;
|
const startTextPosIndex = charIndexMappings[placeholderMatch.index].textPositionIndex;
|
||||||
const y = startTextPos.y;
|
const endTextPosIndex = charIndexMappings[placeholderEndCharIndex - 1].textPositionIndex;
|
||||||
const width = endTextPos.x + endTextPos.w * 0.1 - startTextPos.x;
|
|
||||||
|
/*
|
||||||
|
Get the placeholder's first and last character information from the textPositions array.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
placeholderStart = { text: '{', x: 100, y: 100, w: 100 }
|
||||||
|
placeholderEnd = { text: '}', x: 200, y: 100, w: 100 }
|
||||||
|
*/
|
||||||
|
const placeholderStart = textPositions[startTextPosIndex];
|
||||||
|
const placeholderEnd = textPositions[endTextPosIndex];
|
||||||
|
|
||||||
|
const width = placeholderEnd.x + placeholderEnd.w * 0.1 - placeholderStart.x;
|
||||||
|
|
||||||
placeholders.push({
|
placeholders.push({
|
||||||
placeholder,
|
placeholder,
|
||||||
recipient,
|
recipient,
|
||||||
fieldAndMeta,
|
fieldAndMeta,
|
||||||
page: pageIndex + 1,
|
page: pageIndex + 1,
|
||||||
x,
|
x: placeholderStart.x,
|
||||||
y,
|
y: placeholderStart.y,
|
||||||
width,
|
width,
|
||||||
height: 1,
|
height: 1,
|
||||||
pageWidth,
|
pageWidth: page.Width,
|
||||||
pageHeight,
|
pageHeight: page.Height,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -311,12 +311,6 @@ export const replacePlaceholdersInPDF = async (pdf: Buffer): Promise<Buffer> =>
|
|||||||
- Need to convert from page units to points
|
- Need to convert from page units to points
|
||||||
- Y-axis in pdf-lib is bottom-up (origin at bottom-left)
|
- Y-axis in pdf-lib is bottom-up (origin at bottom-left)
|
||||||
- Y-axis in PDF2JSON is top-down (origin at top-left)
|
- Y-axis in PDF2JSON is top-down (origin at top-left)
|
||||||
|
|
||||||
Conversion formulas:
|
|
||||||
- x_points = (x / pageWidth) * pdfLibPageWidth
|
|
||||||
- y_points = pdfLibPageHeight - ((y / pageHeight) * pdfLibPageHeight)
|
|
||||||
- width_points = (width / pageWidth) * pdfLibPageWidth
|
|
||||||
- height_points = (height / pageHeight) * pdfLibPageHeight
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const xPoints = (placeholder.x / placeholder.pageWidth) * pdfLibPageWidth;
|
const xPoints = (placeholder.x / placeholder.pageWidth) * pdfLibPageWidth;
|
||||||
@ -432,9 +426,9 @@ export const insertFieldsFromPlaceholdersInPDF = async (
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const existingEmails = existingRecipients.map((r) => r.email);
|
const existingEmails = new Set(existingRecipients.map((r) => r.email));
|
||||||
const recipientsToCreateFiltered = recipientsToCreate.filter(
|
const recipientsToCreateFiltered = recipientsToCreate.filter(
|
||||||
(recipient) => !existingEmails.includes(recipient.email),
|
(recipient) => !existingEmails.has(recipient.email),
|
||||||
);
|
);
|
||||||
|
|
||||||
let createdRecipients: Pick<Recipient, 'id' | 'email'>[] = existingRecipients;
|
let createdRecipients: Pick<Recipient, 'id' | 'email'>[] = existingRecipients;
|
||||||
|
|||||||
Reference in New Issue
Block a user