From 3b2e1ada50aa71084971bb4abba6d570bc69999e Mon Sep 17 00:00:00 2001 From: markbruce Date: Tue, 25 Nov 2025 14:52:11 +0000 Subject: [PATCH 1/6] fix: use dynamic count for beginbfrange declaration Fixes garbled text copying in Chrome/Edge for PDFs with >256 unique characters --- lib/font/embedded.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/font/embedded.js b/lib/font/embedded.js index da64efc2..79720686 100644 --- a/lib/font/embedded.js +++ b/lib/font/embedded.js @@ -274,7 +274,7 @@ begincmap 1 begincodespacerange <0000> endcodespacerange -1 beginbfrange +${ranges.length} beginbfrange ${ranges.join('\n')} endbfrange endcmap From 96a2b369469d7393c6b36bf7fbd64fd5dbc312d2 Mon Sep 17 00:00:00 2001 From: markbruce Date: Wed, 26 Nov 2025 11:34:01 +0800 Subject: [PATCH 2/6] Add changelog line Addressed an issue with garbled text copying in Chrome/Edge for PDFs containing more than 256 unique characters. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eadd1cd2..06fcded2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### Unreleased +- Fix garbled text copying in Chrome/Edge for PDFs with >256 unique characters (#1659) + ### [v0.17.2] - 2025-08-30 - Fix rendering lists that spans across pages From dda6f4a762acbaa09f5986502c494c97959c7643 Mon Sep 17 00:00:00 2001 From: markbruce Date: Wed, 26 Nov 2025 11:42:39 +0800 Subject: [PATCH 3/6] test: add tests for beginbfrange count declaration Add test cases to verify that the beginbfrange count declaration in ToUnicode CMap matches the actual number of bfrange entries. - Test for fonts with >256 characters (multiple ranges) - Test for fonts with <=256 characters (single range) These tests ensure the fix for the beginbfrange count bug is correct and prevent regression. Related to #1659 --- tests/unit/font.spec.js | 154 +++++++++++++++++++++++++++------------- 1 file changed, 103 insertions(+), 51 deletions(-) diff --git a/tests/unit/font.spec.js b/tests/unit/font.spec.js index ae3c8f2c..b99a9740 100644 --- a/tests/unit/font.spec.js +++ b/tests/unit/font.spec.js @@ -7,7 +7,7 @@ describe('EmbeddedFont', () => { const document = new PDFDocument(); const font = PDFFontFactory.open( document, - 'tests/fonts/Roboto-Regular.ttf', + 'tests/fonts/Roboto-Regular.ttf' ); const runSpy = jest.spyOn(font, 'layoutRun'); @@ -23,7 +23,7 @@ describe('EmbeddedFont', () => { const document = new PDFDocument({ fontLayoutCache: false }); const font = PDFFontFactory.open( document, - 'tests/fonts/Roboto-Regular.ttf', + 'tests/fonts/Roboto-Regular.ttf' ); const runSpy = jest.spyOn(font, 'layoutRun'); @@ -42,7 +42,7 @@ describe('EmbeddedFont', () => { document, 'tests/fonts/Roboto-Regular.ttf', undefined, - 'F1099', + 'F1099' ); const dictionary = { end: () => {}, @@ -61,7 +61,7 @@ describe('EmbeddedFont', () => { doc, 'tests/fonts/Roboto-Regular.ttf', undefined, - 'F1099', + 'F1099' ); // 398 different glyphs @@ -80,64 +80,116 @@ describe('EmbeddedFont', () => { const docData = logData(doc); font.toUnicodeCmap(); - const text = docData.map((d) => d.toString('utf8')).join(''); - - let glyphs = 0; - for (const block of text.matchAll( - /beginbfrange\n((?:.|\n)*?)\nendbfrange/g, - )) { - for (const line of block[1].matchAll( - /^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gim, - )) { + const text = docData.map((d) => d.toString("utf8")).join(""); + + let glyphs = 0 + for (const block of text.matchAll(/beginbfrange\n((?:.|\n)*?)\nendbfrange/g)) { + for (const line of block[1].matchAll(/^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/igm)) { const low = parseInt(line[1], 16); const high = parseInt(line[2], 16); glyphs += high - low + 1; - expect(high & 0xffffff00).toBe(low & 0xffffff00); + expect(high & 0xFFFFFF00).toBe(low & 0xFFFFFF00); } } expect(glyphs).toBe(398 + 1); }); - }); -}); -describe('sizeToPoint', () => { - let doc; - beforeEach(() => { - doc = new PDFDocument({ - font: 'Helvetica', - fontSize: 12, - size: [250, 500], - margin: { top: 10, right: 5, bottom: 10, left: 5 }, + test('beginbfrange count should match actual number of ranges', () => { + const doc = new PDFDocument({ compress: false }); + const font = PDFFontFactory.open( + doc, + 'tests/fonts/Roboto-Regular.ttf', + undefined, + 'F1099' + ); + + // Generate more than 256 unique characters to trigger multiple bfrange entries + // Each chunk is 256 characters, so we need >256 to get multiple ranges + const chars = []; + + // Add ASCII characters (0-127) + for (let i = 32; i < 127; i++) { + chars.push(String.fromCharCode(i)); + } + + // Add extended Latin characters (128-255) + for (let i = 160; i < 256; i++) { + chars.push(String.fromCharCode(i)); + } + + // Add additional Unicode characters to exceed 256 + const additionalChars = 'ÁÀÂÄÅÃÆÇÐÉÈÊËÍÌÎÏÑÓÒÔÖÕØŒÞÚÙÛÜÝŸáàâäãåæçðéèêëíìîïıñóòôöõøœßþúùûüýÿĀĂĄĆČĎĐĒĖĘĚĞĢĪĮİĶŁĹĻĽŃŅŇŌŐŔŖŘŠŚŞȘŢȚŤŪŮŰŲŽŹŻāăąćčďđēėęěğģīįķłĺļľńņňōőŕŗřšśşșţțťūůűųžźż'; + + const allChars = chars.join('') + additionalChars; + font.encode(allChars); + + const docData = logData(doc); + font.toUnicodeCmap(); + const text = docData.map((d) => d.toString("utf8")).join(""); + + // Extract the count declaration from "N beginbfrange" + const beginbfrangeMatch = text.match(/(\d+)\s+beginbfrange/); + expect(beginbfrangeMatch).not.toBeNull(); + const declaredCount = parseInt(beginbfrangeMatch[1], 10); + + // Count actual bfrange entries + let actualRangeCount = 0; + const bfrangeBlockMatch = text.match(/beginbfrange\n((?:.|\n)*?)\nendbfrange/); + if (bfrangeBlockMatch) { + const bfrangeContent = bfrangeBlockMatch[1]; + // Match each bfrange line: [entries] + const rangeMatches = bfrangeContent.matchAll(/^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm); + for (const match of rangeMatches) { + actualRangeCount++; + } + } + + // The declared count must match the actual number of ranges + expect(declaredCount).toBe(actualRangeCount); + expect(actualRangeCount).toBeGreaterThan(1); // Should have multiple ranges when >256 chars }); - }); - test.each([ - [1, 1], - ['1', 1], - [true, 1], - [false, 0], - ['1em', 12], - ['1in', 72], - ['1px', 0.75], - ['1cm', 28.3465], - ['1mm', 2.8346], - ['1pc', 12], - ['1ex', 11.1], - ['1ch', 6.672], - ['1vw', 2.5], - ['1vh', 5], - ['1vmin', 2.5], - ['1vmax', 5], - ['1%', 0.12], - ['1pt', 1], - ])('%o -> %s', (size, expected) => { - expect(doc.sizeToPoint(size)).toBeCloseTo(expected, 4); - }); + test('beginbfrange count should be 1 for fonts with <=256 characters', () => { + const doc = new PDFDocument({ compress: false }); + const font = PDFFontFactory.open( + doc, + 'tests/fonts/Roboto-Regular.ttf', + undefined, + 'F1099' + ); - test('1rem -> 12', () => { - doc.fontSize(15); - expect(doc.sizeToPoint('1em')).toEqual(15); - expect(doc.sizeToPoint('1rem')).toEqual(12); + // Generate exactly 256 characters + const chars = []; + for (let i = 0; i < 256; i++) { + chars.push(String.fromCharCode(i + 32)); // Start from space (32) to avoid control chars + } + font.encode(chars.join('')); + + const docData = logData(doc); + font.toUnicodeCmap(); + const text = docData.map((d) => d.toString("utf8")).join(""); + + // Extract the count declaration + const beginbfrangeMatch = text.match(/(\d+)\s+beginbfrange/); + expect(beginbfrangeMatch).not.toBeNull(); + const declaredCount = parseInt(beginbfrangeMatch[1], 10); + + // Count actual bfrange entries + let actualRangeCount = 0; + const bfrangeBlockMatch = text.match(/beginbfrange\n((?:.|\n)*?)\nendbfrange/); + if (bfrangeBlockMatch) { + const bfrangeContent = bfrangeBlockMatch[1]; + const rangeMatches = bfrangeContent.matchAll(/^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm); + for (const match of rangeMatches) { + actualRangeCount++; + } + } + + // For <=256 characters, should have exactly 1 range + expect(declaredCount).toBe(1); + expect(actualRangeCount).toBe(1); + expect(declaredCount).toBe(actualRangeCount); + }); }); }); From ffbd14b92ce4a75ba45e8bf33461ff095c553df9 Mon Sep 17 00:00:00 2001 From: zhang_xiaoning Date: Fri, 28 Nov 2025 16:03:25 +0800 Subject: [PATCH 4/6] Revert "test: add tests for beginbfrange count declaration" This reverts commit dda6f4a762acbaa09f5986502c494c97959c7643. --- tests/unit/font.spec.js | 154 +++++++++++++--------------------------- 1 file changed, 51 insertions(+), 103 deletions(-) diff --git a/tests/unit/font.spec.js b/tests/unit/font.spec.js index b99a9740..ae3c8f2c 100644 --- a/tests/unit/font.spec.js +++ b/tests/unit/font.spec.js @@ -7,7 +7,7 @@ describe('EmbeddedFont', () => { const document = new PDFDocument(); const font = PDFFontFactory.open( document, - 'tests/fonts/Roboto-Regular.ttf' + 'tests/fonts/Roboto-Regular.ttf', ); const runSpy = jest.spyOn(font, 'layoutRun'); @@ -23,7 +23,7 @@ describe('EmbeddedFont', () => { const document = new PDFDocument({ fontLayoutCache: false }); const font = PDFFontFactory.open( document, - 'tests/fonts/Roboto-Regular.ttf' + 'tests/fonts/Roboto-Regular.ttf', ); const runSpy = jest.spyOn(font, 'layoutRun'); @@ -42,7 +42,7 @@ describe('EmbeddedFont', () => { document, 'tests/fonts/Roboto-Regular.ttf', undefined, - 'F1099' + 'F1099', ); const dictionary = { end: () => {}, @@ -61,7 +61,7 @@ describe('EmbeddedFont', () => { doc, 'tests/fonts/Roboto-Regular.ttf', undefined, - 'F1099' + 'F1099', ); // 398 different glyphs @@ -80,116 +80,64 @@ describe('EmbeddedFont', () => { const docData = logData(doc); font.toUnicodeCmap(); - const text = docData.map((d) => d.toString("utf8")).join(""); - - let glyphs = 0 - for (const block of text.matchAll(/beginbfrange\n((?:.|\n)*?)\nendbfrange/g)) { - for (const line of block[1].matchAll(/^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/igm)) { + const text = docData.map((d) => d.toString('utf8')).join(''); + + let glyphs = 0; + for (const block of text.matchAll( + /beginbfrange\n((?:.|\n)*?)\nendbfrange/g, + )) { + for (const line of block[1].matchAll( + /^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gim, + )) { const low = parseInt(line[1], 16); const high = parseInt(line[2], 16); glyphs += high - low + 1; - expect(high & 0xFFFFFF00).toBe(low & 0xFFFFFF00); + expect(high & 0xffffff00).toBe(low & 0xffffff00); } } expect(glyphs).toBe(398 + 1); }); + }); +}); - test('beginbfrange count should match actual number of ranges', () => { - const doc = new PDFDocument({ compress: false }); - const font = PDFFontFactory.open( - doc, - 'tests/fonts/Roboto-Regular.ttf', - undefined, - 'F1099' - ); - - // Generate more than 256 unique characters to trigger multiple bfrange entries - // Each chunk is 256 characters, so we need >256 to get multiple ranges - const chars = []; - - // Add ASCII characters (0-127) - for (let i = 32; i < 127; i++) { - chars.push(String.fromCharCode(i)); - } - - // Add extended Latin characters (128-255) - for (let i = 160; i < 256; i++) { - chars.push(String.fromCharCode(i)); - } - - // Add additional Unicode characters to exceed 256 - const additionalChars = 'ÁÀÂÄÅÃÆÇÐÉÈÊËÍÌÎÏÑÓÒÔÖÕØŒÞÚÙÛÜÝŸáàâäãåæçðéèêëíìîïıñóòôöõøœßþúùûüýÿĀĂĄĆČĎĐĒĖĘĚĞĢĪĮİĶŁĹĻĽŃŅŇŌŐŔŖŘŠŚŞȘŢȚŤŪŮŰŲŽŹŻāăąćčďđēėęěğģīįķłĺļľńņňōőŕŗřšśşșţțťūůűųžźż'; - - const allChars = chars.join('') + additionalChars; - font.encode(allChars); - - const docData = logData(doc); - font.toUnicodeCmap(); - const text = docData.map((d) => d.toString("utf8")).join(""); - - // Extract the count declaration from "N beginbfrange" - const beginbfrangeMatch = text.match(/(\d+)\s+beginbfrange/); - expect(beginbfrangeMatch).not.toBeNull(); - const declaredCount = parseInt(beginbfrangeMatch[1], 10); - - // Count actual bfrange entries - let actualRangeCount = 0; - const bfrangeBlockMatch = text.match(/beginbfrange\n((?:.|\n)*?)\nendbfrange/); - if (bfrangeBlockMatch) { - const bfrangeContent = bfrangeBlockMatch[1]; - // Match each bfrange line: [entries] - const rangeMatches = bfrangeContent.matchAll(/^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm); - for (const match of rangeMatches) { - actualRangeCount++; - } - } - - // The declared count must match the actual number of ranges - expect(declaredCount).toBe(actualRangeCount); - expect(actualRangeCount).toBeGreaterThan(1); // Should have multiple ranges when >256 chars +describe('sizeToPoint', () => { + let doc; + beforeEach(() => { + doc = new PDFDocument({ + font: 'Helvetica', + fontSize: 12, + size: [250, 500], + margin: { top: 10, right: 5, bottom: 10, left: 5 }, }); + }); - test('beginbfrange count should be 1 for fonts with <=256 characters', () => { - const doc = new PDFDocument({ compress: false }); - const font = PDFFontFactory.open( - doc, - 'tests/fonts/Roboto-Regular.ttf', - undefined, - 'F1099' - ); - - // Generate exactly 256 characters - const chars = []; - for (let i = 0; i < 256; i++) { - chars.push(String.fromCharCode(i + 32)); // Start from space (32) to avoid control chars - } - font.encode(chars.join('')); - - const docData = logData(doc); - font.toUnicodeCmap(); - const text = docData.map((d) => d.toString("utf8")).join(""); - - // Extract the count declaration - const beginbfrangeMatch = text.match(/(\d+)\s+beginbfrange/); - expect(beginbfrangeMatch).not.toBeNull(); - const declaredCount = parseInt(beginbfrangeMatch[1], 10); - - // Count actual bfrange entries - let actualRangeCount = 0; - const bfrangeBlockMatch = text.match(/beginbfrange\n((?:.|\n)*?)\nendbfrange/); - if (bfrangeBlockMatch) { - const bfrangeContent = bfrangeBlockMatch[1]; - const rangeMatches = bfrangeContent.matchAll(/^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm); - for (const match of rangeMatches) { - actualRangeCount++; - } - } + test.each([ + [1, 1], + ['1', 1], + [true, 1], + [false, 0], + ['1em', 12], + ['1in', 72], + ['1px', 0.75], + ['1cm', 28.3465], + ['1mm', 2.8346], + ['1pc', 12], + ['1ex', 11.1], + ['1ch', 6.672], + ['1vw', 2.5], + ['1vh', 5], + ['1vmin', 2.5], + ['1vmax', 5], + ['1%', 0.12], + ['1pt', 1], + ])('%o -> %s', (size, expected) => { + expect(doc.sizeToPoint(size)).toBeCloseTo(expected, 4); + }); - // For <=256 characters, should have exactly 1 range - expect(declaredCount).toBe(1); - expect(actualRangeCount).toBe(1); - expect(declaredCount).toBe(actualRangeCount); - }); + test('1rem -> 12', () => { + doc.fontSize(15); + expect(doc.sizeToPoint('1em')).toEqual(15); + expect(doc.sizeToPoint('1rem')).toEqual(12); }); }); From b6f65a3665649f417c2820482b4e96117365b378 Mon Sep 17 00:00:00 2001 From: zhang_xiaoning Date: Fri, 28 Nov 2025 16:08:38 +0800 Subject: [PATCH 5/6] Rewrite tests for beginbfrange count declaraton. Fix code style issue. --- tests/unit/font.spec.js | 106 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/tests/unit/font.spec.js b/tests/unit/font.spec.js index ae3c8f2c..3ba70902 100644 --- a/tests/unit/font.spec.js +++ b/tests/unit/font.spec.js @@ -98,6 +98,112 @@ describe('EmbeddedFont', () => { expect(glyphs).toBe(398 + 1); }); + + test('beginbfrange count should match actual number of ranges', () => { + const doc = new PDFDocument({ compress: false }); + const font = PDFFontFactory.open( + doc, + 'tests/fonts/Roboto-Regular.ttf', + undefined, + 'F1099', + ); + + // Generate more than 256 unique characters to trigger multiple bfrange entries + // Each chunk is 256 characters, so we need >256 to get multiple ranges + const chars = []; + + // Add ASCII characters (0-127) + for (let i = 32; i < 127; i++) { + chars.push(String.fromCharCode(i)); + } + + // Add extended Latin characters (128-255) + for (let i = 160; i < 256; i++) { + chars.push(String.fromCharCode(i)); + } + + // Add additional Unicode characters to exceed 256 + const additionalChars = + 'ÁÀÂÄÅÃÆÇÐÉÈÊËÍÌÎÏÑÓÒÔÖÕØŒÞÚÙÛÜÝŸáàâäãåæçðéèêëíìîïıñóòôöõøœßþúùûüýÿĀĂĄĆČĎĐĒĖĘĚĞĢĪĮİĶŁĹĻĽŃŅŇŌŐŔŖŘŠŚŞȘŢȚŤŪŮŰŲŽŹŻāăąćčďđēėęěğģīįķłĺļľńņňōőŕŗřšśşșţțťūůűųžźż'; + + const allChars = chars.join('') + additionalChars; + font.encode(allChars); + + const docData = logData(doc); + font.toUnicodeCmap(); + const text = docData.map((d) => d.toString('utf8')).join(''); + + // Extract the count declaration from "N beginbfrange" + const beginbfrangeMatch = text.match(/(\d+)\s+beginbfrange/); + expect(beginbfrangeMatch).not.toBeNull(); + const declaredCount = parseInt(beginbfrangeMatch[1], 10); + + // Count actual bfrange entries + let actualRangeCount = 0; + const bfrangeBlockMatch = text.match( + /beginbfrange\n((?:.|\n)*?)\nendbfrange/, + ); + if (bfrangeBlockMatch) { + const bfrangeContent = bfrangeBlockMatch[1]; + // Match each bfrange line: [entries] + const rangeMatches = bfrangeContent.matchAll( + /^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm, + ); + for (const match of rangeMatches) { + actualRangeCount++; + } + } + + // The declared count must match the actual number of ranges + expect(declaredCount).toBe(actualRangeCount); + expect(actualRangeCount).toBeGreaterThan(1); // Should have multiple ranges when >256 chars + }); + + test('beginbfrange count should be 1 for fonts with <=256 characters', () => { + const doc = new PDFDocument({ compress: false }); + const font = PDFFontFactory.open( + doc, + 'tests/fonts/Roboto-Regular.ttf', + undefined, + 'F1099', + ); + + // Generate exactly 256 characters + const chars = []; + for (let i = 0; i < 256; i++) { + chars.push(String.fromCharCode(i + 32)); // Start from space (32) to avoid control chars + } + font.encode(chars.join('')); + + const docData = logData(doc); + font.toUnicodeCmap(); + const text = docData.map((d) => d.toString('utf8')).join(''); + + // Extract the count declaration + const beginbfrangeMatch = text.match(/(\d+)\s+beginbfrange/); + expect(beginbfrangeMatch).not.toBeNull(); + const declaredCount = parseInt(beginbfrangeMatch[1], 10); + + // Count actual bfrange entries + let actualRangeCount = 0; + const bfrangeBlockMatch = text.match( + /beginbfrange\n((?:.|\n)*?)\nendbfrange/, + ); + if (bfrangeBlockMatch) { + const bfrangeContent = bfrangeBlockMatch[1]; + const rangeMatches = bfrangeContent.matchAll( + /^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm, + ); + for (const match of rangeMatches) { + actualRangeCount++; + } + } + + // For <=256 characters, should have exactly 1 range + expect(declaredCount).toBe(1); + expect(actualRangeCount).toBe(1); + expect(declaredCount).toBe(actualRangeCount); + }); }); }); From f4ce42ff94f6b49884118ecefd27b1200ba8fa5a Mon Sep 17 00:00:00 2001 From: zhang_xiaoning Date: Sat, 29 Nov 2025 22:35:50 +0800 Subject: [PATCH 6/6] fix(tests): remove unused variables in font.spec.js Replace for loops with unused match variables with spread operator to directly get array length, fixing ESLint no-unused-vars errors. - Replace loop counting with [...rangeMatches].length - Fixes ESLint errors at lines 152 and 197 in tests/unit/font.spec.js - All tests pass successfully --- tests/unit/font.spec.js | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/unit/font.spec.js b/tests/unit/font.spec.js index 3ba70902..c8652ad2 100644 --- a/tests/unit/font.spec.js +++ b/tests/unit/font.spec.js @@ -149,9 +149,7 @@ describe('EmbeddedFont', () => { const rangeMatches = bfrangeContent.matchAll( /^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm, ); - for (const match of rangeMatches) { - actualRangeCount++; - } + actualRangeCount = [...rangeMatches].length; } // The declared count must match the actual number of ranges @@ -194,9 +192,7 @@ describe('EmbeddedFont', () => { const rangeMatches = bfrangeContent.matchAll( /^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/gm, ); - for (const match of rangeMatches) { - actualRangeCount++; - } + actualRangeCount = [...rangeMatches].length; } // For <=256 characters, should have exactly 1 range