Compare commits

...

2 Commits

View File

@ -25,7 +25,7 @@ type TextPosition = {
};
type CharIndexMapping = {
textPosIndex: number;
textPositionIndex: number;
};
type PlaceholderInfo = {
@ -121,28 +121,26 @@ const parseFieldMeta = (
rawFieldMeta is an object with string keys and string values.
It contains string values because the PDF parser returns the values as strings.
E.g. { required: 'true', fontSize: '12', maxValue: '100', minValue: '0', characterLimit: '100' }
E.g. { 'required': 'true', 'fontSize': '12', 'maxValue': '100', 'minValue': '0', 'characterLimit': '100' }
*/
const rawFieldMetaEntries = Object.entries(rawFieldMeta);
for (const entry of rawFieldMetaEntries) {
const [key, value] = entry;
if (key === 'readOnly' || key === 'required') {
parsedFieldMeta[key] = value === 'true';
for (const [property, value] of rawFieldMetaEntries) {
if (property === 'readOnly' || property === 'required') {
parsedFieldMeta[property] = value === 'true';
} else if (
key === 'fontSize' ||
key === 'maxValue' ||
key === 'minValue' ||
key === 'characterLimit'
property === 'fontSize' ||
property === 'maxValue' ||
property === 'minValue' ||
property === 'characterLimit'
) {
const numValue = Number(value);
if (!Number.isNaN(numValue)) {
parsedFieldMeta[key] = numValue;
parsedFieldMeta[property] = numValue;
}
} else {
parsedFieldMeta[key] = value;
parsedFieldMeta[property] = value;
}
}
@ -168,25 +166,26 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
Page dimensions from PDF2JSON are in "page units" (relative coordinates)
*/
const pageWidth = page.Width;
const pageHeight = page.Height;
let pageText = '';
const textPositions: TextPosition[] = [];
const charIndexToTextPos: CharIndexMapping[] = [];
const charIndexMappings: CharIndexMapping[] = [];
page.Texts.forEach((text) => {
/*
R is an array that contains objects with each character.
The decodedText contains only the character, without any other information.
R is an array of objects containing each character, its position and styling information.
The decodedText stores the characters, without any other information.
textPositions stores each character and its position on the page.
*/
const decodedText = text.R.map((run) => decodeURIComponent(run.T)).join('');
/*
For each character in the decodedText, we store its position in the textPositions array.
This allows us to quickly find the position of a character in the textPositions array by its index.
*/
for (let i = 0; i < decodedText.length; i++) {
charIndexToTextPos.push({
textPosIndex: textPositions.length,
charIndexMappings.push({
textPositionIndex: textPositions.length,
});
}
@ -202,13 +201,25 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
const placeholderMatches = pageText.matchAll(/{{([^}]+)}}/g);
/*
A placeholder match has the following format:
[
'{{fieldType,recipient,fieldMeta}}',
'fieldType,recipient,fieldMeta',
'index: <number>',
'input: <pdf-text>'
]
*/
for (const placeholderMatch of placeholderMatches) {
const placeholder = placeholderMatch[0];
const placeholderData = placeholderMatch[1].split(',').map((part) => part.trim());
const placeholderData = placeholderMatch[1].split(',').map((property) => property.trim());
const [fieldTypeString, recipient, ...fieldMetaData] = placeholderData;
const rawFieldMeta = Object.fromEntries(fieldMetaData.map((meta) => meta.split('=')));
const rawFieldMeta = Object.fromEntries(
fieldMetaData.map((property) => property.split('=')),
);
const fieldType = parseFieldType(fieldTypeString);
const parsedFieldMeta = parseFieldMeta(rawFieldMeta, fieldType);
@ -219,51 +230,53 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
});
/*
Find the position of where the placeholder starts in the text
Find the position of where the placeholder starts and ends in the text.
Then find the position of where the placeholder ends in the text by adding the length of the placeholder to the index of the placeholder.
Then find the position of the characters in the textPositions array.
This allows us to quickly find the position of a character in the textPositions array by its index.
*/
const matchIndex = placeholderMatch.index;
if (placeholderMatch.index === undefined) {
console.error('Placeholder match index is undefined for placeholder', placeholder);
if (matchIndex === undefined) {
continue;
}
const placeholderLength = placeholder.length;
const placeholderEndIndex = matchIndex + placeholderLength;
const startCharInfo = charIndexToTextPos[matchIndex];
const endCharInfo = charIndexToTextPos[placeholderEndIndex - 1];
if (!startCharInfo || !endCharInfo) {
console.error('Could not find text position for placeholder', placeholder);
return;
}
const startTextPos = textPositions[startCharInfo.textPosIndex];
const endTextPos = textPositions[endCharInfo.textPosIndex];
const placeholderEndCharIndex = placeholderMatch.index + placeholder.length;
/*
PDF2JSON coordinates - these are in "page units" (relative coordinates)
Calculate width as the distance from start to end, plus a portion of the last character's width
Use 10% of the last character width to avoid extending too far beyond the placeholder
Get the index of the placeholder's first and last character in the textPositions array.
Used to retrieve the character information from the textPositions array.
Example:
startTextPosIndex - 1
endTextPosIndex - 40
*/
const x = startTextPos.x;
const y = startTextPos.y;
const width = endTextPos.x + endTextPos.w * 0.1 - startTextPos.x;
const startTextPosIndex = charIndexMappings[placeholderMatch.index].textPositionIndex;
const endTextPosIndex = charIndexMappings[placeholderEndCharIndex - 1].textPositionIndex;
/*
Get the placeholder's first and last character information from the textPositions array.
Example:
placeholderStart = { text: '{', x: 100, y: 100, w: 100 }
placeholderEnd = { text: '}', x: 200, y: 100, w: 100 }
*/
const placeholderStart = textPositions[startTextPosIndex];
const placeholderEnd = textPositions[endTextPosIndex];
const width = placeholderEnd.x + placeholderEnd.w * 0.1 - placeholderStart.x;
placeholders.push({
placeholder,
recipient,
fieldAndMeta,
page: pageIndex + 1,
x,
y,
x: placeholderStart.x,
y: placeholderStart.y,
width,
height: 1,
pageWidth,
pageHeight,
pageWidth: page.Width,
pageHeight: page.Height,
});
}
});
@ -298,12 +311,6 @@ export const replacePlaceholdersInPDF = async (pdf: Buffer): Promise<Buffer> =>
- Need to convert from page units to points
- Y-axis in pdf-lib is bottom-up (origin at bottom-left)
- Y-axis in PDF2JSON is top-down (origin at top-left)
Conversion formulas:
- x_points = (x / pageWidth) * pdfLibPageWidth
- y_points = pdfLibPageHeight - ((y / pageHeight) * pdfLibPageHeight)
- width_points = (width / pageWidth) * pdfLibPageWidth
- height_points = (height / pageHeight) * pdfLibPageHeight
*/
const xPoints = (placeholder.x / placeholder.pageWidth) * pdfLibPageWidth;
@ -419,9 +426,9 @@ export const insertFieldsFromPlaceholdersInPDF = async (
},
});
const existingEmails = new Set(existingRecipients.map((r) => r.email.toLowerCase()));
const existingEmails = new Set(existingRecipients.map((r) => r.email));
const recipientsToCreateFiltered = recipientsToCreate.filter(
(r) => !existingEmails.has(r.email.toLowerCase()),
(recipient) => !existingEmails.has(recipient.email),
);
let createdRecipients: Pick<Recipient, 'id' | 'email'>[] = existingRecipients;
@ -473,8 +480,7 @@ export const insertFieldsFromPlaceholdersInPDF = async (
const heightPercent = (placeholder.height / placeholder.pageHeight) * 100;
const { email } = extractRecipientPlaceholder(placeholder.recipient);
const normalizedEmail = email.toLowerCase();
const recipient = createdRecipients.find((r) => r.email.toLowerCase() === normalizedEmail);
const recipient = createdRecipients.find((r) => r.email === email);
if (!recipient) {
throw new AppError(AppErrorCode.INVALID_BODY, {