mirror of
https://github.com/documenso/documenso.git
synced 2025-11-13 00:03:33 +10:00
Compare commits
2 Commits
d25565b7d0
...
a08a77e98b
| Author | SHA1 | Date | |
|---|---|---|---|
| a08a77e98b | |||
| 13d9ca7a0e |
@ -25,7 +25,7 @@ type TextPosition = {
|
||||
};
|
||||
|
||||
type CharIndexMapping = {
|
||||
textPosIndex: number;
|
||||
textPositionIndex: number;
|
||||
};
|
||||
|
||||
type PlaceholderInfo = {
|
||||
@ -121,28 +121,26 @@ const parseFieldMeta = (
|
||||
rawFieldMeta is an object with string keys and string values.
|
||||
It contains string values because the PDF parser returns the values as strings.
|
||||
|
||||
E.g. { required: 'true', fontSize: '12', maxValue: '100', minValue: '0', characterLimit: '100' }
|
||||
E.g. { 'required': 'true', 'fontSize': '12', 'maxValue': '100', 'minValue': '0', 'characterLimit': '100' }
|
||||
*/
|
||||
const rawFieldMetaEntries = Object.entries(rawFieldMeta);
|
||||
|
||||
for (const entry of rawFieldMetaEntries) {
|
||||
const [key, value] = entry;
|
||||
|
||||
if (key === 'readOnly' || key === 'required') {
|
||||
parsedFieldMeta[key] = value === 'true';
|
||||
for (const [property, value] of rawFieldMetaEntries) {
|
||||
if (property === 'readOnly' || property === 'required') {
|
||||
parsedFieldMeta[property] = value === 'true';
|
||||
} else if (
|
||||
key === 'fontSize' ||
|
||||
key === 'maxValue' ||
|
||||
key === 'minValue' ||
|
||||
key === 'characterLimit'
|
||||
property === 'fontSize' ||
|
||||
property === 'maxValue' ||
|
||||
property === 'minValue' ||
|
||||
property === 'characterLimit'
|
||||
) {
|
||||
const numValue = Number(value);
|
||||
|
||||
if (!Number.isNaN(numValue)) {
|
||||
parsedFieldMeta[key] = numValue;
|
||||
parsedFieldMeta[property] = numValue;
|
||||
}
|
||||
} else {
|
||||
parsedFieldMeta[key] = value;
|
||||
parsedFieldMeta[property] = value;
|
||||
}
|
||||
}
|
||||
|
||||
@ -168,25 +166,26 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
|
||||
|
||||
Page dimensions from PDF2JSON are in "page units" (relative coordinates)
|
||||
*/
|
||||
const pageWidth = page.Width;
|
||||
const pageHeight = page.Height;
|
||||
|
||||
let pageText = '';
|
||||
const textPositions: TextPosition[] = [];
|
||||
const charIndexToTextPos: CharIndexMapping[] = [];
|
||||
const charIndexMappings: CharIndexMapping[] = [];
|
||||
|
||||
page.Texts.forEach((text) => {
|
||||
/*
|
||||
R is an array that contains objects with each character.
|
||||
The decodedText contains only the character, without any other information.
|
||||
R is an array of objects containing each character, its position and styling information.
|
||||
The decodedText stores the characters, without any other information.
|
||||
|
||||
textPositions stores each character and its position on the page.
|
||||
*/
|
||||
const decodedText = text.R.map((run) => decodeURIComponent(run.T)).join('');
|
||||
|
||||
/*
|
||||
For each character in the decodedText, we store its position in the textPositions array.
|
||||
This allows us to quickly find the position of a character in the textPositions array by its index.
|
||||
*/
|
||||
for (let i = 0; i < decodedText.length; i++) {
|
||||
charIndexToTextPos.push({
|
||||
textPosIndex: textPositions.length,
|
||||
charIndexMappings.push({
|
||||
textPositionIndex: textPositions.length,
|
||||
});
|
||||
}
|
||||
|
||||
@ -202,13 +201,25 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
|
||||
|
||||
const placeholderMatches = pageText.matchAll(/{{([^}]+)}}/g);
|
||||
|
||||
/*
|
||||
A placeholder match has the following format:
|
||||
|
||||
[
|
||||
'{{fieldType,recipient,fieldMeta}}',
|
||||
'fieldType,recipient,fieldMeta',
|
||||
'index: <number>',
|
||||
'input: <pdf-text>'
|
||||
]
|
||||
*/
|
||||
for (const placeholderMatch of placeholderMatches) {
|
||||
const placeholder = placeholderMatch[0];
|
||||
const placeholderData = placeholderMatch[1].split(',').map((part) => part.trim());
|
||||
const placeholderData = placeholderMatch[1].split(',').map((property) => property.trim());
|
||||
|
||||
const [fieldTypeString, recipient, ...fieldMetaData] = placeholderData;
|
||||
|
||||
const rawFieldMeta = Object.fromEntries(fieldMetaData.map((meta) => meta.split('=')));
|
||||
const rawFieldMeta = Object.fromEntries(
|
||||
fieldMetaData.map((property) => property.split('=')),
|
||||
);
|
||||
|
||||
const fieldType = parseFieldType(fieldTypeString);
|
||||
const parsedFieldMeta = parseFieldMeta(rawFieldMeta, fieldType);
|
||||
@ -219,51 +230,53 @@ export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<Placehold
|
||||
});
|
||||
|
||||
/*
|
||||
Find the position of where the placeholder starts in the text
|
||||
Find the position of where the placeholder starts and ends in the text.
|
||||
|
||||
Then find the position of where the placeholder ends in the text by adding the length of the placeholder to the index of the placeholder.
|
||||
Then find the position of the characters in the textPositions array.
|
||||
This allows us to quickly find the position of a character in the textPositions array by its index.
|
||||
*/
|
||||
const matchIndex = placeholderMatch.index;
|
||||
if (placeholderMatch.index === undefined) {
|
||||
console.error('Placeholder match index is undefined for placeholder', placeholder);
|
||||
|
||||
if (matchIndex === undefined) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const placeholderLength = placeholder.length;
|
||||
const placeholderEndIndex = matchIndex + placeholderLength;
|
||||
|
||||
const startCharInfo = charIndexToTextPos[matchIndex];
|
||||
const endCharInfo = charIndexToTextPos[placeholderEndIndex - 1];
|
||||
|
||||
if (!startCharInfo || !endCharInfo) {
|
||||
console.error('Could not find text position for placeholder', placeholder);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const startTextPos = textPositions[startCharInfo.textPosIndex];
|
||||
const endTextPos = textPositions[endCharInfo.textPosIndex];
|
||||
const placeholderEndCharIndex = placeholderMatch.index + placeholder.length;
|
||||
|
||||
/*
|
||||
PDF2JSON coordinates - these are in "page units" (relative coordinates)
|
||||
Calculate width as the distance from start to end, plus a portion of the last character's width
|
||||
Use 10% of the last character width to avoid extending too far beyond the placeholder
|
||||
Get the index of the placeholder's first and last character in the textPositions array.
|
||||
Used to retrieve the character information from the textPositions array.
|
||||
|
||||
Example:
|
||||
startTextPosIndex - 1
|
||||
endTextPosIndex - 40
|
||||
*/
|
||||
const x = startTextPos.x;
|
||||
const y = startTextPos.y;
|
||||
const width = endTextPos.x + endTextPos.w * 0.1 - startTextPos.x;
|
||||
const startTextPosIndex = charIndexMappings[placeholderMatch.index].textPositionIndex;
|
||||
const endTextPosIndex = charIndexMappings[placeholderEndCharIndex - 1].textPositionIndex;
|
||||
|
||||
/*
|
||||
Get the placeholder's first and last character information from the textPositions array.
|
||||
|
||||
Example:
|
||||
placeholderStart = { text: '{', x: 100, y: 100, w: 100 }
|
||||
placeholderEnd = { text: '}', x: 200, y: 100, w: 100 }
|
||||
*/
|
||||
const placeholderStart = textPositions[startTextPosIndex];
|
||||
const placeholderEnd = textPositions[endTextPosIndex];
|
||||
|
||||
const width = placeholderEnd.x + placeholderEnd.w * 0.1 - placeholderStart.x;
|
||||
|
||||
placeholders.push({
|
||||
placeholder,
|
||||
recipient,
|
||||
fieldAndMeta,
|
||||
page: pageIndex + 1,
|
||||
x,
|
||||
y,
|
||||
x: placeholderStart.x,
|
||||
y: placeholderStart.y,
|
||||
width,
|
||||
height: 1,
|
||||
pageWidth,
|
||||
pageHeight,
|
||||
pageWidth: page.Width,
|
||||
pageHeight: page.Height,
|
||||
});
|
||||
}
|
||||
});
|
||||
@ -298,12 +311,6 @@ export const replacePlaceholdersInPDF = async (pdf: Buffer): Promise<Buffer> =>
|
||||
- Need to convert from page units to points
|
||||
- Y-axis in pdf-lib is bottom-up (origin at bottom-left)
|
||||
- Y-axis in PDF2JSON is top-down (origin at top-left)
|
||||
|
||||
Conversion formulas:
|
||||
- x_points = (x / pageWidth) * pdfLibPageWidth
|
||||
- y_points = pdfLibPageHeight - ((y / pageHeight) * pdfLibPageHeight)
|
||||
- width_points = (width / pageWidth) * pdfLibPageWidth
|
||||
- height_points = (height / pageHeight) * pdfLibPageHeight
|
||||
*/
|
||||
|
||||
const xPoints = (placeholder.x / placeholder.pageWidth) * pdfLibPageWidth;
|
||||
@ -419,9 +426,9 @@ export const insertFieldsFromPlaceholdersInPDF = async (
|
||||
},
|
||||
});
|
||||
|
||||
const existingEmails = new Set(existingRecipients.map((r) => r.email.toLowerCase()));
|
||||
const existingEmails = new Set(existingRecipients.map((r) => r.email));
|
||||
const recipientsToCreateFiltered = recipientsToCreate.filter(
|
||||
(r) => !existingEmails.has(r.email.toLowerCase()),
|
||||
(recipient) => !existingEmails.has(recipient.email),
|
||||
);
|
||||
|
||||
let createdRecipients: Pick<Recipient, 'id' | 'email'>[] = existingRecipients;
|
||||
@ -473,8 +480,7 @@ export const insertFieldsFromPlaceholdersInPDF = async (
|
||||
const heightPercent = (placeholder.height / placeholder.pageHeight) * 100;
|
||||
|
||||
const { email } = extractRecipientPlaceholder(placeholder.recipient);
|
||||
const normalizedEmail = email.toLowerCase();
|
||||
const recipient = createdRecipients.find((r) => r.email.toLowerCase() === normalizedEmail);
|
||||
const recipient = createdRecipients.find((r) => r.email === email);
|
||||
|
||||
if (!recipient) {
|
||||
throw new AppError(AppErrorCode.INVALID_BODY, {
|
||||
|
||||
Reference in New Issue
Block a user