mirror of
https://github.com/documenso/documenso.git
synced 2025-11-26 22:44:41 +10:00
Compare commits
2 Commits
v2.0.9
...
1780a5c262
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1780a5c262 | ||
|
|
cb9bf407f7 |
13
package-lock.json
generated
13
package-lock.json
generated
@@ -19,6 +19,7 @@
|
|||||||
"inngest-cli": "^0.29.1",
|
"inngest-cli": "^0.29.1",
|
||||||
"luxon": "^3.5.0",
|
"luxon": "^3.5.0",
|
||||||
"mupdf": "^1.0.0",
|
"mupdf": "^1.0.0",
|
||||||
|
"pdf2json": "^4.0.0",
|
||||||
"react": "^18",
|
"react": "^18",
|
||||||
"typescript": "5.6.2",
|
"typescript": "5.6.2",
|
||||||
"zod": "3.24.1"
|
"zod": "3.24.1"
|
||||||
@@ -27198,6 +27199,18 @@
|
|||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/pdf2json": {
|
||||||
|
"version": "4.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/pdf2json/-/pdf2json-4.0.0.tgz",
|
||||||
|
"integrity": "sha512-WkezNsLK8sGpuFC7+PPP0DsXROwdoOxmXPBTtUWWkCwCi/Vi97MRC52Ly6FWIJjOKIywpm/L2oaUgSrmtU+7ZQ==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"bin": {
|
||||||
|
"pdf2json": "bin/pdf2json.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.18.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/pdfjs-dist": {
|
"node_modules/pdfjs-dist": {
|
||||||
"version": "3.11.174",
|
"version": "3.11.174",
|
||||||
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-3.11.174.tgz",
|
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-3.11.174.tgz",
|
||||||
|
|||||||
@@ -74,6 +74,7 @@
|
|||||||
"inngest-cli": "^0.29.1",
|
"inngest-cli": "^0.29.1",
|
||||||
"luxon": "^3.5.0",
|
"luxon": "^3.5.0",
|
||||||
"mupdf": "^1.0.0",
|
"mupdf": "^1.0.0",
|
||||||
|
"pdf2json": "^4.0.0",
|
||||||
"react": "^18",
|
"react": "^18",
|
||||||
"typescript": "5.6.2",
|
"typescript": "5.6.2",
|
||||||
"zod": "3.24.1"
|
"zod": "3.24.1"
|
||||||
|
|||||||
202
packages/lib/server-only/pdf/auto-place-fields.ts
Normal file
202
packages/lib/server-only/pdf/auto-place-fields.ts
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
import { PDFDocument, rgb } from '@cantoo/pdf-lib';
|
||||||
|
import PDFParser from 'pdf2json';
|
||||||
|
|
||||||
|
import { getPageSize } from './get-page-size';
|
||||||
|
|
||||||
|
type TextPosition = {
|
||||||
|
text: string;
|
||||||
|
x: number;
|
||||||
|
y: number;
|
||||||
|
w: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type CharIndexMapping = {
|
||||||
|
textPosIndex: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type PlaceholderInfo = {
|
||||||
|
placeholder: string;
|
||||||
|
fieldType: string;
|
||||||
|
recipient: string;
|
||||||
|
isRequired: string;
|
||||||
|
page: number;
|
||||||
|
// PDF2JSON coordinates (in page units - these are relative to page dimensions)
|
||||||
|
x: number;
|
||||||
|
y: number;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
// Page dimensions from PDF2JSON (in page units)
|
||||||
|
pageWidth: number;
|
||||||
|
pageHeight: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
Questions for later:
|
||||||
|
- Does it handle multi-page PDFs?
|
||||||
|
- What happens with incorrect placeholders? E.g. those containing non-accepted properties.
|
||||||
|
- The placeholder data is dynamic. How to handle this parsing? Perhaps we need to do it similar to the fieldMeta parsing.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export const extractPlaceholdersFromPDF = async (pdf: Buffer): Promise<PlaceholderInfo[]> => {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const parser = new PDFParser(null, true);
|
||||||
|
|
||||||
|
parser.on('pdfParser_dataError', (errData) => {
|
||||||
|
reject(errData);
|
||||||
|
});
|
||||||
|
|
||||||
|
parser.on('pdfParser_dataReady', (pdfData) => {
|
||||||
|
const placeholders: PlaceholderInfo[] = [];
|
||||||
|
|
||||||
|
pdfData.Pages.forEach((page, pageIndex) => {
|
||||||
|
/*
|
||||||
|
pdf2json returns the PDF page content as an array of characters.
|
||||||
|
We need to concatenate the characters to get the full text.
|
||||||
|
We also need to get the position of the text so we can place the placeholders in the correct position.
|
||||||
|
|
||||||
|
Page dimensions from PDF2JSON are in "page units" (relative coordinates)
|
||||||
|
*/
|
||||||
|
const pageWidth = page.Width;
|
||||||
|
const pageHeight = page.Height;
|
||||||
|
|
||||||
|
let pageText = '';
|
||||||
|
const textPositions: TextPosition[] = [];
|
||||||
|
const charIndexToTextPos: CharIndexMapping[] = [];
|
||||||
|
|
||||||
|
page.Texts.forEach((text) => {
|
||||||
|
/*
|
||||||
|
R is an array that contains objects with each character.
|
||||||
|
The decodedText contains only the character, without any other information.
|
||||||
|
|
||||||
|
textPositions stores each character and its position on the page.
|
||||||
|
*/
|
||||||
|
const decodedText = text.R.map((run) => decodeURIComponent(run.T)).join('');
|
||||||
|
|
||||||
|
for (let i = 0; i < decodedText.length; i++) {
|
||||||
|
charIndexToTextPos.push({
|
||||||
|
textPosIndex: textPositions.length,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pageText += decodedText;
|
||||||
|
|
||||||
|
textPositions.push({
|
||||||
|
text: decodedText,
|
||||||
|
x: text.x,
|
||||||
|
y: text.y,
|
||||||
|
w: text.w || 0,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const placeholderMatches = pageText.matchAll(/{{([^}]+)}}/g);
|
||||||
|
|
||||||
|
for (const match of placeholderMatches) {
|
||||||
|
const placeholder = match[0];
|
||||||
|
const placeholderData = match[1].split(',').map((part) => part.trim());
|
||||||
|
|
||||||
|
const [fieldType, recipient, isRequired] = placeholderData;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Find the position of where the placeholder starts in the text
|
||||||
|
|
||||||
|
Then find the position of where the placeholder ends in the text by adding the length of the placeholder to the index of the placeholder.
|
||||||
|
*/
|
||||||
|
const matchIndex = match.index;
|
||||||
|
const placeholderLength = placeholder.length;
|
||||||
|
const placeholderEndIndex = matchIndex + placeholderLength;
|
||||||
|
|
||||||
|
const startCharInfo = charIndexToTextPos[matchIndex];
|
||||||
|
const endCharInfo = charIndexToTextPos[placeholderEndIndex - 1];
|
||||||
|
|
||||||
|
if (!startCharInfo || !endCharInfo) {
|
||||||
|
console.error('Could not find text position for placeholder', placeholder);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const startTextPos = textPositions[startCharInfo.textPosIndex];
|
||||||
|
const endTextPos = textPositions[endCharInfo.textPosIndex];
|
||||||
|
|
||||||
|
/*
|
||||||
|
PDF2JSON coordinates - these are in "page units" (relative coordinates)
|
||||||
|
Calculate width as the distance from start to end, plus a portion of the last character's width
|
||||||
|
Use 10% of the last character width to avoid extending too far beyond the placeholder
|
||||||
|
*/
|
||||||
|
const x = startTextPos.x;
|
||||||
|
const y = startTextPos.y;
|
||||||
|
const width = endTextPos.x + endTextPos.w * 0.1 - startTextPos.x;
|
||||||
|
|
||||||
|
placeholders.push({
|
||||||
|
placeholder,
|
||||||
|
fieldType,
|
||||||
|
recipient,
|
||||||
|
isRequired,
|
||||||
|
page: pageIndex + 1,
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
width,
|
||||||
|
height: 1,
|
||||||
|
pageWidth,
|
||||||
|
pageHeight,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
resolve(placeholders);
|
||||||
|
});
|
||||||
|
|
||||||
|
parser.parseBuffer(pdf);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
export const replacePlaceholdersInPDF = async (pdf: Buffer): Promise<Buffer> => {
|
||||||
|
const placeholders = await extractPlaceholdersFromPDF(pdf);
|
||||||
|
|
||||||
|
const pdfDoc = await PDFDocument.load(new Uint8Array(pdf));
|
||||||
|
const pages = pdfDoc.getPages();
|
||||||
|
|
||||||
|
for (const placeholder of placeholders) {
|
||||||
|
const pageIndex = placeholder.page - 1;
|
||||||
|
const page = pages[pageIndex];
|
||||||
|
|
||||||
|
const { width: pdfLibPageWidth, height: pdfLibPageHeight } = getPageSize(page);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Convert PDF2JSON coordinates to pdf-lib coordinates:
|
||||||
|
|
||||||
|
PDF2JSON uses relative "page units":
|
||||||
|
- x, y, width, height are in page units
|
||||||
|
- Page dimensions (Width, Height) are also in page units
|
||||||
|
|
||||||
|
pdf-lib uses absolute points (1 point = 1/72 inch):
|
||||||
|
- Need to convert from page units to points
|
||||||
|
- Y-axis in pdf-lib is bottom-up (origin at bottom-left)
|
||||||
|
- Y-axis in PDF2JSON is top-down (origin at top-left)
|
||||||
|
|
||||||
|
Conversion formulas:
|
||||||
|
- x_points = (x / pageWidth) * pdfLibPageWidth
|
||||||
|
- y_points = pdfLibPageHeight - ((y / pageHeight) * pdfLibPageHeight)
|
||||||
|
- width_points = (width / pageWidth) * pdfLibPageWidth
|
||||||
|
- height_points = (height / pageHeight) * pdfLibPageHeight
|
||||||
|
*/
|
||||||
|
|
||||||
|
const xPoints = (placeholder.x / placeholder.pageWidth) * pdfLibPageWidth;
|
||||||
|
const yPoints = pdfLibPageHeight - (placeholder.y / placeholder.pageHeight) * pdfLibPageHeight;
|
||||||
|
const widthPoints = (placeholder.width / placeholder.pageWidth) * pdfLibPageWidth;
|
||||||
|
const heightPoints = (placeholder.height / placeholder.pageHeight) * pdfLibPageHeight;
|
||||||
|
|
||||||
|
page.drawRectangle({
|
||||||
|
x: xPoints,
|
||||||
|
y: yPoints - heightPoints, // Adjust for height since y is at baseline
|
||||||
|
width: widthPoints,
|
||||||
|
height: heightPoints,
|
||||||
|
color: rgb(1, 1, 1),
|
||||||
|
borderColor: rgb(1, 1, 1),
|
||||||
|
borderWidth: 2,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const modifiedPdfBytes = await pdfDoc.save();
|
||||||
|
|
||||||
|
return Buffer.from(modifiedPdfBytes);
|
||||||
|
};
|
||||||
Reference in New Issue
Block a user