Search for text in a PDF using JavaScript

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.

JavaScript

1async function main() {
2 const doc = await PDFNet.PDFDoc.createFromURL(filename);
3 const txtSearch = await PDFNet.TextSearch.create();
4 let mode = PDFNet.TextSearch.Mode.e_whole_word + PDFNet.TextSearch.Mode.e_page_stop; // Uses both whole word and page stop
5 let pattern = '';
6
7 //use regular expression to find credit card number
8 mode += PDFNet.TextSearch.Mode.e_reg_expression + PDFNet.TextSearch.Mode.e_highlight;
9 txtSearch.setMode(mode);
10 pattern = '\\d{4}-\\d{4}-\\d{4}-\\d{4}'; // or "(\\d{4}-){3}\\d{4}"
11 txtSearch.setPattern(pattern);
12
13 //call Begin() method to initialize the text search.
14 txtSearch.begin(doc, pattern, mode);
15 const result = await txtSearch.run();
16
17 if (result.code === PDFNet.TextSearch.ResultCode.e_found) {
18 // add a link annotation based on the location of the found instance
19 hlts = result.highlights;
20 await hlts.begin(doc); // is await needed?
21 while (await hlts.hasNext()) {
22 const curPage = await doc.getPage(await hlts.getCurrentPageNumber());
23 const quadArr = await hlts.getCurrentQuads();
24 for (let i = 0; i < quadArr.length; ++i) {
25 const currQuad = quadArr[i];
26 const x1 = Math.min(Math.min(Math.min(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
27 const x2 = Math.max(Math.max(Math.max(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
28 const y1 = Math.min(Math.min(Math.min(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
29 const y2 = Math.max(Math.max(Math.max(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
30
31 const hyperLink = await PDFNet.LinkAnnot.create(doc, await PDFNet.Rect.init(x1, y1, x2, y2));
32 await hyperLink.setAction(await PDFNet.Action.createURI(doc, 'http://www.apryse.com'));
33 await curPage.annotPushBack(hyperLink);
34 }
35 hlts.next();
36 }
37 }
38}
39PDFNet.runWithCleanup(main);

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales