From 66c787222ee48039589bdbb979e806db15fdf281 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Sun, 22 Oct 2023 21:20:18 -0400 Subject: [PATCH] pst --- docz/docs/03-demos/01-frontend/03-angular.md | 22 +- .../01-frontend/19-bundler/20-parcel.md | 4 +- .../03-demos/03-net/02-server/01-express.md | 2 +- .../03-demos/03-net/02-server/04-drash.md | 2 +- .../03-demos/03-net/02-server/09-elysia.md | 2 +- .../03-demos/03-net/02-server/11-nestjs.md | 2 +- .../03-demos/03-net/02-server/19-fastify.md | 2 +- docz/docs/03-demos/03-net/02-server/index.md | 2 +- docz/docs/03-demos/03-net/04-email/11-pst.md | 388 ++++++++++++++++++ .../03-demos/03-net/04-email/_category_.json | 4 + .../03-net/{03-email.md => 04-email/index.md} | 102 +---- docz/docs/03-demos/03-net/08-headless.md | 1 + docz/docs/07-csf/07-features/03-hyperlinks.md | 18 +- docz/static/pst/SheetJSPST.js | 51 +++ 14 files changed, 483 insertions(+), 119 deletions(-) create mode 100644 docz/docs/03-demos/03-net/04-email/11-pst.md create mode 100644 docz/docs/03-demos/03-net/04-email/_category_.json rename docz/docs/03-demos/03-net/{03-email.md => 04-email/index.md} (77%) create mode 100644 docz/static/pst/SheetJSPST.js diff --git a/docz/docs/03-demos/01-frontend/03-angular.md b/docz/docs/03-demos/01-frontend/03-angular.md index 52d95d1..da55c7d 100644 --- a/docz/docs/03-demos/01-frontend/03-angular.md +++ b/docz/docs/03-demos/01-frontend/03-angular.md @@ -156,7 +156,7 @@ export class AppComponent { :::note -This demo was last run on 2023-07-24 using Angular CLI `16.1.5` +This demo was last run on 2023-10-22 using Angular CLI `16.2.7` ::: @@ -169,7 +169,7 @@ npx @angular/cli analytics disable -g 1) Create a new project: ```bash -npx @angular/cli new --minimal --defaults --no-interactive sheetjs-angular +npx @angular/cli@16.2.7 new --minimal --defaults --no-interactive sheetjs-angular ``` 2) Install the SheetJS dependency and start the dev server: @@ -184,10 +184,11 @@ npm start`} 3) Open a web browser and access the displayed URL (`http://localhost:4200`) -4) Replace `src/app/app.component.ts` with the code snippet. +4) Replace `src/app/app.component.ts` with the previous code snippet. -The page will refresh and show a table with an Export button. Click the button -and the page will attempt to download `SheetJSAngularAoO.xlsx`. +The page will refresh and show a table with an Export button. Click the button +and the page will attempt to download `SheetJSAngularAoO.xlsx`. Open the file +with a spreadsheet editor. 5) Stop the dev server and build the site: @@ -263,7 +264,7 @@ export class AppComponent { :::note -This demo was last run on 2023-07-24 using Angular CLI `16.1.5` +This demo was last run on 2023-10-22 using Angular CLI `16.2.7` ::: @@ -276,7 +277,7 @@ npx @angular/cli analytics disable -g 1) Create a new project: ```bash -npx @angular/cli new --minimal --defaults --no-interactive sheetjs-angular +npx @angular/cli@16.2.7 new --minimal --defaults --no-interactive sheetjs-angular ``` 2) Install the SheetJS dependency and start the dev server: @@ -291,10 +292,11 @@ npm start`} 3) Open a web browser and access the displayed URL (`http://localhost:4200`) -4) Replace `src/app/app.component.ts` with the code snippet. +4) Replace `src/app/app.component.ts` with the previous code snippet. -The page will refresh and show a table with an Export button. Click the button -and the page will attempt to download `SheetJSAngularHTML.xlsx`. +The page will refresh and show a table with an Export button. Click the button +and the page will attempt to download `SheetJSAngularHTML.xlsx`. Open the file +with a spreadsheet editor. 5) Stop the dev server and build the site: diff --git a/docz/docs/03-demos/01-frontend/19-bundler/20-parcel.md b/docz/docs/03-demos/01-frontend/19-bundler/20-parcel.md index b5d87bb..334e3c6 100644 --- a/docz/docs/03-demos/01-frontend/19-bundler/20-parcel.md +++ b/docz/docs/03-demos/01-frontend/19-bundler/20-parcel.md @@ -40,7 +40,7 @@ import { read, utils, writeFileXLSX } from 'xlsx'; :::warning Parcel Bug Errors of the form `Could not statically evaluate fs call` stem from a Parcel -bug. Upgrade to Parcel version 1.5.0 or later. +bug[^1]. Upgrade to Parcel version 1.5.0 or later. ::: @@ -164,3 +164,5 @@ npx http-server dist Access the displayed URL (typically `http://localhost:8080/`) in a web browser. Click on "Click here to export" to generate a file. + +[^1]: See [Issue 523 in the Parcel issue tracker](https://github.com/parcel-bundler/parcel/pull/523#issuecomment-357486164) \ No newline at end of file diff --git a/docz/docs/03-demos/03-net/02-server/01-express.md b/docz/docs/03-demos/03-net/02-server/01-express.md index c7680ac..0f70427 100644 --- a/docz/docs/03-demos/03-net/02-server/01-express.md +++ b/docz/docs/03-demos/03-net/02-server/01-express.md @@ -2,7 +2,7 @@ title: Sheets in ExpressJS sidebar_label: ExpressJS pagination_prev: demos/net/network -pagination_next: demos/net/email +pagination_next: demos/net/email/index --- import current from '/version.js'; diff --git a/docz/docs/03-demos/03-net/02-server/04-drash.md b/docz/docs/03-demos/03-net/02-server/04-drash.md index 0f4881c..dfcb8f1 100644 --- a/docz/docs/03-demos/03-net/02-server/04-drash.md +++ b/docz/docs/03-demos/03-net/02-server/04-drash.md @@ -2,7 +2,7 @@ title: Sheets in Drash sidebar_label: Drash pagination_prev: demos/net/network -pagination_next: demos/net/email +pagination_next: demos/net/email/index --- import current from '/version.js'; diff --git a/docz/docs/03-demos/03-net/02-server/09-elysia.md b/docz/docs/03-demos/03-net/02-server/09-elysia.md index ee0925f..30c5a15 100644 --- a/docz/docs/03-demos/03-net/02-server/09-elysia.md +++ b/docz/docs/03-demos/03-net/02-server/09-elysia.md @@ -2,7 +2,7 @@ title: Sheets in Elysia sidebar_label: ElysiaJS pagination_prev: demos/net/network -pagination_next: demos/net/email +pagination_next: demos/net/email/index --- import current from '/version.js'; diff --git a/docz/docs/03-demos/03-net/02-server/11-nestjs.md b/docz/docs/03-demos/03-net/02-server/11-nestjs.md index 88a2df8..549c9d1 100644 --- a/docz/docs/03-demos/03-net/02-server/11-nestjs.md +++ b/docz/docs/03-demos/03-net/02-server/11-nestjs.md @@ -2,7 +2,7 @@ title: Sheets in NestJS sidebar_label: NestJS pagination_prev: demos/net/network -pagination_next: demos/net/email +pagination_next: demos/net/email/index --- import current from '/version.js'; diff --git a/docz/docs/03-demos/03-net/02-server/19-fastify.md b/docz/docs/03-demos/03-net/02-server/19-fastify.md index 00fda2f..5898355 100644 --- a/docz/docs/03-demos/03-net/02-server/19-fastify.md +++ b/docz/docs/03-demos/03-net/02-server/19-fastify.md @@ -2,7 +2,7 @@ title: Sheets in FastifyJS sidebar_label: FastifyJS pagination_prev: demos/net/network -pagination_next: demos/net/email +pagination_next: demos/net/email/index --- import current from '/version.js'; diff --git a/docz/docs/03-demos/03-net/02-server/index.md b/docz/docs/03-demos/03-net/02-server/index.md index 4567d8e..360f703 100644 --- a/docz/docs/03-demos/03-net/02-server/index.md +++ b/docz/docs/03-demos/03-net/02-server/index.md @@ -1,7 +1,7 @@ --- title: HTTP Server Processing pagination_prev: demos/net/network -pagination_next: demos/net/email +pagination_next: demos/net/email/index --- import current from '/version.js'; diff --git a/docz/docs/03-demos/03-net/04-email/11-pst.md b/docz/docs/03-demos/03-net/04-email/11-pst.md new file mode 100644 index 0000000..f5daa7f --- /dev/null +++ b/docz/docs/03-demos/03-net/04-email/11-pst.md @@ -0,0 +1,388 @@ +--- +title: Sheets in PST Mailboxes +sidebar_label: PST Mailboxes +pagination_prev: demos/net/server/index +pagination_next: demos/net/headless +--- + +import current from '/version.js'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; + + + + + +PST (Personal Storage Table) is a common file format for storing messages. +Electronic discovery commonly involves extracting data from attached +spreadsheets in e-mail messages stored in PST archives. + +`pst-extractor`[^1] is a NodeJS module designed for extracting objects from PST +files. It has been used to extract spreadsheets from the Enron Corpus[^2] and +other large mailboxes. + +[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing +data from spreadsheets. + +This demo uses `pst-extractor` and SheetJS to read spreadsheets. We'll explore +how to load SheetJS in a NodeJS script or website, extract spreadsheets files, +and generate HTML and CSV views of the underlying data. + +The ["Live Demo"](#live-demo) reads PST files. Individual spreadsheets within +the file can be downloaded or previewed in the browser. + +:::note + +This demo was last tested on 2023 October 22 against `pst-extractor` 1.9.0 + +::: + +## Overview + +The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be +imported from scripts that use `pst-extractor`. + +### Parsing PST Files + +The `pst-extractor` module exposes a `PSTFile` class. The constructor requires a +proper NodeJS buffer. + +The following snippet reads and parses `enron.pst` from the local filesystem. +`fs.readFileSync`[^3] accepts a filename and returns a Buffer: + +```js +const fs = require("fs"), PSTExtractor = require("pst-extractor"); +const file = fs.readFileSync("enron.pst"); +const pst = new (PSTExtractor.PSTFile)(file); +``` + +### Walking the Tree + +`pst-extractor` presents a tree-like structure to inspect the contents of the +PST file. It is recommended to use recursive functions to walk the tree. + +The following tree walker will collect all XLSX and XLS attachments: + +```js +/* walk the PST file and add all attachments to the specified array */ +function walk(f,arr) { + if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr); + if(f.contentCount <= 0) return; + for(let e = f.getNextChild(); e != null; e = f.getNextChild()) { + for(let i = 0; i < e.numberOfAttachments; ++i) { + var a = e.getAttachment(i); + /* XLS spreadsheet test by filename */ + if(/.xls[xmb]?$/.test(a.filename)) arr.push(a); + } + } +} + +/* generate a list of attachments */ +const files = []; +walk(pst.getRootFolder(), files); +``` + +### Generating Buffers + +The `PSTAttachment` class holds attachment metadata. To avoid loading everything +in memory, the raw data is exposed as a custom stream object. Since the SheetJS +`read` function requires data in a `Buffer` or `Uint8Array`, a helper function +is used to collect the data: + +```js +/* collect data from the attachment into a "Buffer" */ +function collect(file) { + const strm = file.fileInputStream; + const data = Buffer.alloc(strm._length.low); + strm.readCompletely(data); + return data; +} + +/* collect data from the first attachment */ +const buf0 = collect(files[0]); +``` + +### Processing Attachments + +Given a NodeJS Buffer, the SheetJS `read` method[^4] parses the data and returns +a workbook object[^5]. Individual worksheets can be extracted from the workbook +and converted to CSV[^6] or HTML[^7]. + +The following example prints the contents of each worksheet in CSV form: + +```js +const XLSX = require("xlsx"); + +/* parse workbook and print CSV contents of each sheet */ +const wb = XLSX.read(buf0); +wb.SheetNames.forEach(n => { + const ws = wb.Sheets[n]; + const csv = XLSX.utils.sheet_to_csv(ws); + console.log(`#### ${file.filename} ! ${n}`); + console.log(csv); +}); +``` + +### Browser Caveats + +The [SheetJS Standalone scripts](/docs/getting-started/installation/standalone) +can be loaded through a `SCRIPT` tag. + +This demo uses [a special `pst-extractor` build](#browser-build) for the web. + +Compared to the NodeJS build, browser scripts require special Buffer wrappers. +For example, the following function will fail since the library does not support +`ArrayBuffer` objects: + +```js +async function error_fetch_and_parse_pst(url) { + const ab = await (await fetch(url)).arrayBuffer(); + // this will throw an error + return new (PSTExtractor.PSTFile)(ab); +} +``` + +The browser build exposes the `Buffer` object in the `PSTExtractor` global: + +```js +async function correct_fetch_and_parse_pst(url) { + const ab = await (await fetch(url)).arrayBuffer(); +// highlight-next-line + const buf = new PSTExtractor.Buffer(ab); + return new (PSTExtractor.PSTFile)(buf); +} +``` + +### Browser Build + +The `pst-extractor` library is designed for NodeJS. Parts of the library expect +a NodeJS `Buffer`, which does not exist in the browser. A fake `Buffer` can be +added and exposed in a script. + +[`pstextractor.js`](pathname:///pst/pstextractor.js) is loaded in the demo page. + +
Build instructions (click to show) + +1) Initialize a new NodeJS project and install the dependency: + +```bash +mkdir pstextract +cd pstextract +npm init -y +npm i --save pst-extractor@1.9.0 +``` + +2) Save the following to `shim.js`: + +```js title="shim.js" +const PSTExtractor = require("pst-extractor"); +module.exports = PSTExtractor; +module.exports.Buffer = Buffer; +``` + +3) Build the script: + +```bash +npx browserify@17.0.0 -s PSTExtractor -o pstextractor.js shim.js +``` + +
+ +## Demos + +### NodeJS + +This demo will fetch a [test PST](pathnamme:///pst/enron.pst) and extract all +embedded spreadsheets. The script can be adapted to read local PST files or pull +PST files from a different URL. + +0) Initialize a new project: + +```bash +mkdir sheetjs-pst +cd sheetjs-pst +npm init -y +``` + +2) Install the SheetJS NodeJS module and `pst-extractor`: + + + +{`\ +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pst-extractor`} + + + +{`\ +pnpm install https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pst-extractor`} + + + +{`\ +yarn add https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pst-extractor`} + + + + +2) Download [`SheetJSPST.js`](pathname:///pst/SheetJSPST.js) into project folder: + +```bash +curl -LO https://docs.sheetjs.com/pst/SheetJSPST.js +``` + +3) Run the script: + +```js +node SheetJSPST.js +``` + +The process will fetch [the test PST](pathnamme:///pst/enron.pst) and extract +the embedded spreadsheets. The terminal will display info on the exported files. + +:::note pass + +Lines starting with `saving file` show how attachments correspond to files. The +following line states that the first attachment (index `0`) was originally named +`RedRockA.xls` and was saved to `file0.xls` on the file system: + +``` +saving file 0 |RedRockA.xls| to file0.xls +``` + +Lines starting with `####` show the attachment file name and the worksheet name. +The following line explains that there is a worksheet named `"Oct 26, 2001"` in +the file `RedRockA.xls`: + +``` +#### RedRockA.xls ! Oct 26, 2001 +``` + +Every other line is a CSV row from the named worksheet. For example, the first +four lines of worksheet `"Oct 26, 2001"` in `RedRockA.xls` are shown below: + +```text +#### RedRockA.xls ! Oct 26, 2001 +// highlight-start +RED ROCK EXPANSION PROJECT,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,, +,,,, , , ,,,,,,,,,,,, +SHIPPER,CONTRACT #,Term,MMBtu/d,RECEIPT POINT,DELIVERY POINT,MMBtu/d,,,,,,,,,,,, +// highlight-end +``` + +::: + +### Live Demo + +This demo reads PST mailboxes. Due to browser limitations, PST files larger than +100 MB may crash the browser. + +After parsing the PST file, the "Attachments" table will list attached XLSX and +XLS spreadsheets in the file. The "preview" link will display a HTML table with +the data in the spreadsheet. The "download" link will download the attachment. + +The [test file](pathname:///pst/enron.pst) was based on the EDRM clean extract +from the "Enron Corpus" and includes a few XLS attachments. + +:::caution pass + +If the live demo shows a message + +``` +Please reload the page +``` + +please refresh the page. This is a known bug in the documentation generator. + +::: + +```jsx live +function SheetJSPreviewPSTSheets() { + const [ files, setFiles ] = React.useState([]); + const [ __html, setHTML ] = React.useState(""); + + /* recursively walk PST and collect attachments */ + const walk = (f,arr) => { + if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr); + if(f.contentCount <= 0) return; + for(let e = f.getNextChild(); e != null; e = f.getNextChild()) { + for(let i = 0; i < e.numberOfAttachments; ++i) { + var a = e.getAttachment(i); + /* XLS spreadsheet test by filename */ + if(/.xls[xmb]?$/.test(a.filename)) arr.push(a); + } + } + } + + /* collect data from the attachment into a "Buffer" */ + const collect = (j) => { + const strm = files[j].fileInputStream; + const data = new PSTExtractor.Buffer(strm._length.low); + strm.readCompletely(data); + return data; + } + + /* view selected attachment */ + const view = (j) => { + const data = collect(j); + + /* parse */ + const wb = XLSX.read(data); + + /* convert first sheet to HTML */ + const ws = wb.Sheets[wb.SheetNames[0]]; + setHTML(XLSX.utils.sheet_to_html(ws)); + } + + /* process array buffer */ + const process_ab = (ab) => { + const pst = new (PSTExtractor.PSTFile)(new PSTExtractor.Buffer(ab)); + const data = []; + walk(pst.getRootFolder(), data); + setFiles(data); + }; + + /* on click, fetch and process file */ + const doit = async() => { + const ab = await (await fetch("/pst/enron.pst")).arrayBuffer(); + process_ab(ab); + }; + const chg = async(e) => process_ab(await e.target.files[0].arrayBuffer()); + + /* download selected attachment */ + const dl = (j) => { + const a = document.createElement("a"); + a.download = files[j].filename; + a.href = URL.createObjectURL(new Blob([collect(j)])); + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + } + + if(typeof PSTExtractor == "undefined") return Please reload the page; + return ( <> +

Use the file input to select a file, or click "Use a Sample PST"

+ +

+ + {files.map((f,j) => ( + + + + + ))} +
Attachments
{f.filename}view(j)}>(preview)dl(j)}>(download)
+ Preview of first worksheet
+
+ ); +} +``` + +[^1]: The project has no official website. The official [repository](https://github.com/epfromer/pst-extractor) is hosted on GitHub. +[^2]: Extracted spreadsheets are [available on GitHub](https://github.com/SheetJS/enron_xls) +[^3]: See [`fs.readFileSync`](https://nodejs.org/api/fs.html#fsreadfilesyncpath-options) in the NodeJS documentation +[^4]: See [`read` in "Reading Files"](/docs/api/parse-options) +[^5]: See ["Workbook Object"](/docs/csf/book) +[^6]: See [`sheet_to_csv` in "CSV and Text"](/docs/api/utilities/csv#delimiter-separated-output) +[^7]: See [`sheet_to_html` in "Utilities"](/docs/api/utilities/html#html-table-output) diff --git a/docz/docs/03-demos/03-net/04-email/_category_.json b/docz/docs/03-demos/03-net/04-email/_category_.json new file mode 100644 index 0000000..e1a9a31 --- /dev/null +++ b/docz/docs/03-demos/03-net/04-email/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "Electronic Mail", + "position": 4 +} \ No newline at end of file diff --git a/docz/docs/03-demos/03-net/03-email.md b/docz/docs/03-demos/03-net/04-email/index.md similarity index 77% rename from docz/docs/03-demos/03-net/03-email.md rename to docz/docs/03-demos/03-net/04-email/index.md index e536b14..5175afd 100644 --- a/docz/docs/03-demos/03-net/03-email.md +++ b/docz/docs/03-demos/03-net/04-email/index.md @@ -1,16 +1,12 @@ --- title: Electronic Mail pagination_prev: demos/net/server/index +pagination_next: demos/net/headless --- import current from '/version.js'; import CodeBlock from '@theme/CodeBlock'; - - - - - Electronic mail ("email" or "e-mail") is an essential part of modern business workflows. Spreadsheets are commonly passed around and processed. @@ -367,98 +363,4 @@ proprietary mail and email account file formats. ### PST -`PST` is a common file format. The `pst-extractor` library is designed for -extracting messages and attachments from `PST` files in NodeJS and the browser. - -This demo uses [a special build](pathname:///pst/pstextractor.js) for the web. - -
Build details (click to show) - -1) Initialize a new NodeJS project and install the dependency: - -```bash -mkdir pstextract -cd pstextract -npm init -y -npm i --save pst-extractor@1.9.0 -``` - -2) Save the following to `shim.js`: - -```js title="shim.js" -const PSTExtractor = require("pst-extractor"); -module.exports = PSTExtractor; -module.exports.Buffer = Buffer; -``` - -3) Build the script: - -```bash -npx browserify@17.0.0 -s PSTExtractor -o pstextractor.js shim.js -``` - -
- -The [test file](pathname:///pst/enron.pst) was based on the EDRM clean extract -from the "Enron Corpus" and includes a few XLS attachments. - -```jsx live -function SheetJSPreviewPSTSheets() { - const [ files, setFiles ] = React.useState([]); - const [ __html, setHTML ] = React.useState(""); - - /* recursively walk PST and collect attachments */ - const walk = (f,arr) => { - if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr); - if(f.contentCount > 0) for(let e = f.getNextChild(); e != null; e = f.getNextChild()) { - for(var i = 0; i < e.numberOfAttachments; ++i) { - var a = e.getAttachment(i); - /* XLS spreadsheet test by filename */ - if(a.filename.endsWith(".xls")) arr.push(a); - } - } - } - - /* view selected attachment */ - const view = (j) => { - /* collect data into a "Buffer" */ - const strm = files[j].fileInputStream; - const data = new PSTExtractor.Buffer(strm._length.low); - strm.readCompletely(data); - - /* parse */ - const wb = XLSX.read(data); - - /* convert first sheet to HTML */ - const ws = wb.Sheets[wb.SheetNames[0]]; - setHTML(XLSX.utils.sheet_to_html(ws)); - } - - /* process array buffer */ - const process_ab = (ab) => { - const pst = new (PSTExtractor.PSTFile)(new PSTExtractor.Buffer(ab)); - const data = []; - walk(pst.getRootFolder(), data); - setFiles(data); - }; - - - /* on click, fetch and process file */ - const doit = async() => { - const ab = await (await fetch("/pst/enron.pst")).arrayBuffer(); - process_ab(ab); - }; - const chg = async(e) => process_ab(await e.target.files[0].arrayBuffer()); - - return ( <> -

Use the file input to select a file, or click "Use a Sample PST"

-

-
- Attachments - - Table View
-
- ); -} \ No newline at end of file +**[The exposition has been moved to a separate page.](/docs/demos/net/email/pst)** diff --git a/docz/docs/03-demos/03-net/08-headless.md b/docz/docs/03-demos/03-net/08-headless.md index 682693b..61f4dc5 100644 --- a/docz/docs/03-demos/03-net/08-headless.md +++ b/docz/docs/03-demos/03-net/08-headless.md @@ -1,5 +1,6 @@ --- title: Browser Automation +pagination_prev: demos/net/email/index --- import current from '/version.js'; diff --git a/docz/docs/07-csf/07-features/03-hyperlinks.md b/docz/docs/07-csf/07-features/03-hyperlinks.md index 7f10b6e..4d7f6b3 100644 --- a/docz/docs/07-csf/07-features/03-hyperlinks.md +++ b/docz/docs/07-csf/07-features/03-hyperlinks.md @@ -1,9 +1,9 @@ --- +title: Hyperlinks and Tooltips +sidebar_label: Hyperlinks sidebar_position: 3 --- -# Hyperlinks -
File Format Support (click to show) @@ -254,6 +254,20 @@ XLSX documents. A workaround was added in library version 0.18.12. ::: +## Tooltips + +Tooltips are attached to hyperlink information. There is no way to specify a +tooltip without assigning a cell link. + +:::warning pass + +**Excel has an undocumented tooltip length limit of 255 characters.** + +Writing longer tooltips is currently permitted by the library but the generated +files will not open in Excel. + +::: + ## HTML The HTML DOM parser[^1] will process `` links in the table. diff --git a/docz/static/pst/SheetJSPST.js b/docz/static/pst/SheetJSPST.js new file mode 100644 index 0000000..d0134f4 --- /dev/null +++ b/docz/static/pst/SheetJSPST.js @@ -0,0 +1,51 @@ +const fs = require("fs"); +const PSTExtractor = require("pst-extractor"); +const XLSX = require("xlsx"); + +/* walk the PST file and add all attachments to the specified array */ +function walk(f,arr) { + if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr); + if(f.contentCount <= 0) return; + for(let e = f.getNextChild(); e != null; e = f.getNextChild()) { + for(let i = 0; i < e.numberOfAttachments; ++i) { + var a = e.getAttachment(i); + /* XLS spreadsheet test by filename */ + if(/.xls[xmb]?$/.test(a.filename)) arr.push(a); + } + } +} + +/* collect data from the attachment into a "Buffer" */ +function collect(file) { + const strm = file.fileInputStream; + const data = Buffer.alloc(strm._length.low); + strm.readCompletely(data); + return data; +} + +(async() => { + /* fetch https://docs.sheetjs.com/pst/enron.pst */ + const ab = await (await fetch("https://docs.sheetjs.com/pst/enron.pst")).arrayBuffer(); + const pst = new (PSTExtractor.PSTFile)(Buffer.from(ab)); + + /* generate a list of attachments */ + const files = []; + walk(pst.getRootFolder(), files); + + files.forEach((file, idx) => { + /* extract and save workbook to file */ + const ext = file.filename.slice(file.filename.lastIndexOf(".") + 1); + console.log(`saving file ${idx} |${file.filename}| to file${idx}.${ext}`); + const buf = collect(file); + fs.writeFileSync(`file${idx}.${ext}`, buf); + + /* parse workbook and print CSV contents of each sheet */ + const wb = XLSX.read(buf); + wb.SheetNames.forEach(n => { + const ws = wb.Sheets[n]; + const csv = XLSX.utils.sheet_to_csv(ws); + console.log(`#### ${file.filename} ! ${n}`); + console.log(csv); + }); + }); +})();