pst

2023-10-22 21:20:18 -04:00 · 2023-10-22 21:20:18 -04:00 · 66c787222e
commit 66c787222e
parent 35d76f9a62
14 changed files with 483 additions and 119 deletions
--- a/docz/docs/03-demos/01-frontend/03-angular.md
+++ b/docz/docs/03-demos/01-frontend/03-angular.md
@ -156,7 +156,7 @@ export class AppComponent {

 :::note

-This demo was last run on 2023-07-24 using Angular CLI `16.1.5`
+This demo was last run on 2023-10-22 using Angular CLI `16.2.7`

 :::

@ -169,7 +169,7 @@ npx @angular/cli analytics disable -g
 1) Create a new project:

 ```bash
-npx @angular/cli new --minimal --defaults --no-interactive sheetjs-angular
+npx @angular/cli@16.2.7 new --minimal --defaults --no-interactive sheetjs-angular
 ```

 2) Install the SheetJS dependency and start the dev server:
@ -184,10 +184,11 @@ npm start`}

 3) Open a web browser and access the displayed URL (`http://localhost:4200`)

-4) Replace `src/app/app.component.ts` with the code snippet.
+4) Replace `src/app/app.component.ts` with the previous code snippet.

-The page will refresh and show a table with an Export button.  Click the button
-and the page will attempt to download `SheetJSAngularAoO.xlsx`.
+The page will refresh and show a table with an Export button. Click the button
+and the page will attempt to download `SheetJSAngularAoO.xlsx`. Open the file
+with a spreadsheet editor.

 5) Stop the dev server and build the site:

@ -263,7 +264,7 @@ export class AppComponent {

 :::note

-This demo was last run on 2023-07-24 using Angular CLI `16.1.5`
+This demo was last run on 2023-10-22 using Angular CLI `16.2.7`

 :::

@ -276,7 +277,7 @@ npx @angular/cli analytics disable -g
 1) Create a new project:

 ```bash
-npx @angular/cli new --minimal --defaults --no-interactive sheetjs-angular
+npx @angular/cli@16.2.7 new --minimal --defaults --no-interactive sheetjs-angular
 ```

 2) Install the SheetJS dependency and start the dev server:
@ -291,10 +292,11 @@ npm start`}

 3) Open a web browser and access the displayed URL (`http://localhost:4200`)

-4) Replace `src/app/app.component.ts` with the code snippet.
+4) Replace `src/app/app.component.ts` with the previous code snippet.

-The page will refresh and show a table with an Export button.  Click the button
-and the page will attempt to download `SheetJSAngularHTML.xlsx`.
+The page will refresh and show a table with an Export button. Click the button
+and the page will attempt to download `SheetJSAngularHTML.xlsx`. Open the file
+with a spreadsheet editor.

 5) Stop the dev server and build the site:

--- a/docz/docs/03-demos/01-frontend/19-bundler/20-parcel.md
+++ b/docz/docs/03-demos/01-frontend/19-bundler/20-parcel.md
@ -40,7 +40,7 @@ import { read, utils, writeFileXLSX } from 'xlsx';
 :::warning Parcel Bug

 Errors of the form `Could not statically evaluate fs call` stem from a Parcel
-bug. Upgrade to Parcel version 1.5.0 or later.
+bug[^1]. Upgrade to Parcel version 1.5.0 or later.

 :::

@ -164,3 +164,5 @@ npx http-server dist

 Access the displayed URL (typically `http://localhost:8080/`) in a web browser.
 Click on "Click here to export" to generate a file.
+
+[^1]: See [Issue 523 in the Parcel issue tracker](https://github.com/parcel-bundler/parcel/pull/523#issuecomment-357486164)
--- a/docz/docs/03-demos/03-net/02-server/01-express.md
+++ b/docz/docs/03-demos/03-net/02-server/01-express.md
@ -2,7 +2,7 @@
 title: Sheets in ExpressJS
 sidebar_label: ExpressJS
 pagination_prev: demos/net/network
-pagination_next: demos/net/email
+pagination_next: demos/net/email/index
 ---

 import current from '/version.js';
--- a/docz/docs/03-demos/03-net/02-server/04-drash.md
+++ b/docz/docs/03-demos/03-net/02-server/04-drash.md
@ -2,7 +2,7 @@
 title: Sheets in Drash
 sidebar_label: Drash
 pagination_prev: demos/net/network
-pagination_next: demos/net/email
+pagination_next: demos/net/email/index
 ---

 import current from '/version.js';
--- a/docz/docs/03-demos/03-net/02-server/09-elysia.md
+++ b/docz/docs/03-demos/03-net/02-server/09-elysia.md
@ -2,7 +2,7 @@
 title: Sheets in Elysia
 sidebar_label: ElysiaJS
 pagination_prev: demos/net/network
-pagination_next: demos/net/email
+pagination_next: demos/net/email/index
 ---

 import current from '/version.js';
--- a/docz/docs/03-demos/03-net/02-server/11-nestjs.md
+++ b/docz/docs/03-demos/03-net/02-server/11-nestjs.md
@ -2,7 +2,7 @@
 title: Sheets in NestJS
 sidebar_label: NestJS
 pagination_prev: demos/net/network
-pagination_next: demos/net/email
+pagination_next: demos/net/email/index
 ---

 import current from '/version.js';
--- a/docz/docs/03-demos/03-net/02-server/19-fastify.md
+++ b/docz/docs/03-demos/03-net/02-server/19-fastify.md
@ -2,7 +2,7 @@
 title: Sheets in FastifyJS
 sidebar_label: FastifyJS
 pagination_prev: demos/net/network
-pagination_next: demos/net/email
+pagination_next: demos/net/email/index
 ---

 import current from '/version.js';
--- a/docz/docs/03-demos/03-net/02-server/index.md
+++ b/docz/docs/03-demos/03-net/02-server/index.md
@ -1,7 +1,7 @@
 ---
 title: HTTP Server Processing
 pagination_prev: demos/net/network
-pagination_next: demos/net/email
+pagination_next: demos/net/email/index
 ---

 import current from '/version.js';
--- a/docz/docs/03-demos/03-net/04-email/11-pst.md
+++ b/docz/docs/03-demos/03-net/04-email/11-pst.md
@ -0,0 +1,388 @@
+---
+title: Sheets in PST Mailboxes
+sidebar_label: PST Mailboxes
+pagination_prev: demos/net/server/index
+pagination_next: demos/net/headless
+---
+
+import current from '/version.js';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import CodeBlock from '@theme/CodeBlock';
+
+<head>
+  <script src="/pst/pstextractor.js"></script>
+</head>
+
+PST (Personal Storage Table) is a common file format for storing messages.
+Electronic discovery commonly involves extracting data from attached
+spreadsheets in e-mail messages stored in PST archives.
+
+`pst-extractor`[^1] is a NodeJS module designed for extracting objects from PST
+files. It has been used to extract spreadsheets from the Enron Corpus[^2] and
+other large mailboxes.
+
+[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing
+data from spreadsheets.
+
+This demo uses `pst-extractor` and SheetJS to read spreadsheets. We'll explore
+how to load SheetJS in a NodeJS script or website, extract spreadsheets files,
+and generate HTML and CSV views of the underlying data.
+
+The ["Live Demo"](#live-demo) reads PST files. Individual spreadsheets within
+the file can be downloaded or previewed in the browser.
+
+:::note
+
+This demo was last tested on 2023 October 22 against `pst-extractor` 1.9.0
+
+:::
+
+## Overview
+
+The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be
+imported from scripts that use `pst-extractor`.
+
+### Parsing PST Files
+
+The `pst-extractor` module exposes a `PSTFile` class. The constructor requires a
+proper NodeJS buffer.
+
+The following snippet reads and parses `enron.pst` from the local filesystem.
+`fs.readFileSync`[^3] accepts a filename and returns a Buffer:
+
+```js
+const fs = require("fs"), PSTExtractor = require("pst-extractor");
+const file = fs.readFileSync("enron.pst");
+const pst = new (PSTExtractor.PSTFile)(file);
+```
+
+### Walking the Tree
+
+`pst-extractor` presents a tree-like structure to inspect the contents of the
+PST file. It is recommended to use recursive functions to walk the tree.
+
+The following tree walker will collect all XLSX and XLS attachments:
+
+```js
+/* walk the PST file and add all attachments to the specified array */
+function walk(f,arr) {
+  if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
+  if(f.contentCount <= 0) return;
+  for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
+    for(let i = 0; i < e.numberOfAttachments; ++i) {
+      var a = e.getAttachment(i);
+      /* XLS spreadsheet test by filename */
+      if(/.xls[xmb]?$/.test(a.filename)) arr.push(a);
+    }
+  }
+}
+
+/* generate a list of attachments */
+const files = [];
+walk(pst.getRootFolder(), files);
+```
+
+### Generating Buffers
+
+The `PSTAttachment` class holds attachment metadata. To avoid loading everything
+in memory, the raw data is exposed as a custom stream object. Since the SheetJS
+`read` function requires data in a `Buffer` or `Uint8Array`, a helper function
+is used to collect the data:
+
+```js
+/* collect data from the attachment into a "Buffer" */
+function collect(file) {
+  const strm = file.fileInputStream;
+  const data = Buffer.alloc(strm._length.low);
+  strm.readCompletely(data);
+  return data;
+}
+
+/* collect data from the first attachment */
+const buf0 = collect(files[0]);
+```
+
+### Processing Attachments
+
+Given a NodeJS Buffer, the SheetJS `read` method[^4] parses the data and returns
+a workbook object[^5]. Individual worksheets can be extracted from the workbook
+and converted to CSV[^6] or HTML[^7].
+
+The following example prints the contents of each worksheet in CSV form:
+
+```js
+const XLSX = require("xlsx");
+
+/* parse workbook and print CSV contents of each sheet */
+const wb = XLSX.read(buf0);
+wb.SheetNames.forEach(n => {
+  const ws = wb.Sheets[n];
+  const csv = XLSX.utils.sheet_to_csv(ws);
+  console.log(`#### ${file.filename} ! ${n}`);
+  console.log(csv);
+});
+```
+
+### Browser Caveats
+
+The [SheetJS Standalone scripts](/docs/getting-started/installation/standalone)
+can be loaded through a `SCRIPT` tag.
+
+This demo uses [a special `pst-extractor` build](#browser-build) for the web.
+
+Compared to the NodeJS build, browser scripts require special Buffer wrappers.
+For example, the following function will fail since the library does not support
+`ArrayBuffer` objects:
+
+```js
+async function error_fetch_and_parse_pst(url) {
+  const ab = await (await fetch(url)).arrayBuffer();
+  // this will throw an error
+  return new (PSTExtractor.PSTFile)(ab);
+}
+```
+
+The browser build exposes the `Buffer` object in the `PSTExtractor` global:
+
+```js
+async function correct_fetch_and_parse_pst(url) {
+  const ab = await (await fetch(url)).arrayBuffer();
+// highlight-next-line
+  const buf = new PSTExtractor.Buffer(ab);
+  return new (PSTExtractor.PSTFile)(buf);
+}
+```
+
+### Browser Build
+
+The `pst-extractor` library is designed for NodeJS. Parts of the library expect
+a NodeJS `Buffer`, which does not exist in the browser. A fake `Buffer` can be
+added and exposed in a script.
+
+[`pstextractor.js`](pathname:///pst/pstextractor.js) is loaded in the demo page.
+
+<details><summary><b>Build instructions</b> (click to show)</summary>
+
+1) Initialize a new NodeJS project and install the dependency:
+
+```bash
+mkdir pstextract
+cd pstextract
+npm init -y
+npm i --save pst-extractor@1.9.0
+```
+
+2) Save the following to `shim.js`:
+
+```js title="shim.js"
+const PSTExtractor = require("pst-extractor");
+module.exports = PSTExtractor;
+module.exports.Buffer = Buffer;
+```
+
+3) Build the script:
+
+```bash
+npx browserify@17.0.0 -s PSTExtractor -o pstextractor.js shim.js
+```
+
+</details>
+
+## Demos
+
+### NodeJS
+
+This demo will fetch a [test PST](pathnamme:///pst/enron.pst) and extract all
+embedded spreadsheets. The script can be adapted to read local PST files or pull
+PST files from a different URL.
+
+0) Initialize a new project:
+
+```bash
+mkdir sheetjs-pst
+cd sheetjs-pst
+npm init -y
+```
+
+2) Install the SheetJS NodeJS module and `pst-extractor`:
+
+<Tabs groupId="pm">
+  <TabItem value="npm" label="npm">
+<CodeBlock language="bash">{`\
+npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pst-extractor`}
+</CodeBlock>
+  </TabItem>
+  <TabItem value="pnpm" label="pnpm">
+<CodeBlock language="bash">{`\
+pnpm install https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pst-extractor`}
+</CodeBlock>
+  </TabItem>
+  <TabItem value="yarn" label="Yarn" default>
+<CodeBlock language="bash">{`\
+yarn add https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pst-extractor`}
+</CodeBlock>
+  </TabItem>
+</Tabs>
+
+2) Download [`SheetJSPST.js`](pathname:///pst/SheetJSPST.js) into project folder:
+
+```bash
+curl -LO https://docs.sheetjs.com/pst/SheetJSPST.js
+```
+
+3) Run the script:
+
+```js
+node SheetJSPST.js
+```
+
+The process will fetch [the test PST](pathnamme:///pst/enron.pst) and extract
+the embedded spreadsheets. The terminal will display info on the exported files.
+
+:::note pass
+
+Lines starting with `saving file` show how attachments correspond to files. The
+following line states that the first attachment (index `0`) was originally named
+`RedRockA.xls` and was saved to `file0.xls` on the file system:
+
+```
+saving file 0 |RedRockA.xls| to file0.xls
+```
+
+Lines starting with `####` show the attachment file name and the worksheet name.
+The following line explains that there is a worksheet named `"Oct 26, 2001"` in
+the file `RedRockA.xls`:
+
+```
+#### RedRockA.xls ! Oct 26, 2001
+```
+
+Every other line is a CSV row from the named worksheet. For example, the first
+four lines of worksheet `"Oct 26, 2001"` in `RedRockA.xls` are shown below:
+
+```text
+#### RedRockA.xls ! Oct 26, 2001
+// highlight-start
+RED ROCK EXPANSION PROJECT,,,,,,,,,,,,,,,,,,
+,,,,,,,,,,,,,,,,,,
+,,,, , , ,,,,,,,,,,,,
+SHIPPER,CONTRACT #,Term,MMBtu/d,RECEIPT POINT,DELIVERY POINT,MMBtu/d,,,,,,,,,,,,
+// highlight-end
+```
+
+:::
+
+### Live Demo
+
+This demo reads PST mailboxes. Due to browser limitations, PST files larger than
+100 MB may crash the browser.
+
+After parsing the PST file, the "Attachments" table will list attached XLSX and
+XLS spreadsheets in the file. The "preview" link will display a HTML table with
+the data in the spreadsheet. The "download" link will download the attachment.
+
+The [test file](pathname:///pst/enron.pst) was based on the EDRM clean extract
+from the "Enron Corpus" and includes a few XLS attachments.
+
+:::caution pass
+
+If the live demo shows a message
+
+```
+Please reload the page
+```
+
+please refresh the page.  This is a known bug in the documentation generator.
+
+:::
+
+```jsx live
+function SheetJSPreviewPSTSheets() {
+  const [ files, setFiles ] = React.useState([]);
+  const [ __html, setHTML ] = React.useState("");
+
+  /* recursively walk PST and collect attachments */
+  const walk = (f,arr) => {
+    if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
+    if(f.contentCount <= 0) return;
+    for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
+      for(let i = 0; i < e.numberOfAttachments; ++i) {
+        var a = e.getAttachment(i);
+        /* XLS spreadsheet test by filename */
+        if(/.xls[xmb]?$/.test(a.filename)) arr.push(a);
+      }
+    }
+  }
+
+  /* collect data from the attachment into a "Buffer" */
+  const collect = (j) => {
+    const strm = files[j].fileInputStream;
+    const data = new PSTExtractor.Buffer(strm._length.low);
+    strm.readCompletely(data);
+    return data;
+  }
+
+  /* view selected attachment */
+  const view = (j) => {
+    const data = collect(j);
+
+    /* parse */
+    const wb = XLSX.read(data);
+
+    /* convert first sheet to HTML */
+    const ws = wb.Sheets[wb.SheetNames[0]];
+    setHTML(XLSX.utils.sheet_to_html(ws));
+  }
+
+  /* process array buffer */
+  const process_ab = (ab) => {
+    const pst = new (PSTExtractor.PSTFile)(new PSTExtractor.Buffer(ab));
+    const data = [];
+    walk(pst.getRootFolder(), data);
+    setFiles(data);
+  };
+
+  /* on click, fetch and process file */
+  const doit = async() => {
+    const ab = await (await fetch("/pst/enron.pst")).arrayBuffer();
+    process_ab(ab);
+  };
+  const chg = async(e) => process_ab(await e.target.files[0].arrayBuffer());
+
+  /* download selected attachment */
+  const dl = (j) => {
+    const a = document.createElement("a");
+    a.download = files[j].filename;
+    a.href = URL.createObjectURL(new Blob([collect(j)]));
+    document.body.appendChild(a);
+    a.click();
+    document.body.removeChild(a);
+  }
+
+  if(typeof PSTExtractor == "undefined") return <b>Please reload the page</b>;
+  return ( <>
+    <p>Use the file input to select a file, or click "Use a Sample PST"</p>
+    <input type="file" accept=".pst" onChange={chg}/>
+    <button onClick={doit}>Use a Sample PST!</button><br/><br/>
+    <table><thead><th colspan="3">Attachments</th></thead>
+      <tbody>{files.map((f,j) => (
+        <tr key={j}><th>{f.filename}</th>
+          <td><a onClick={()=>view(j)}>(preview)</a></td>
+          <td><a onClick={()=>dl(j)}>(download)</a></td>
+        </tr>
+      ))}</tbody>
+    </table>
+    <b>Preview of first worksheet</b><br/>
+    <div dangerouslySetInnerHTML={{__html}}></div>
+  </> );
+}
+```
+
+[^1]: The project has no official website. The official [repository](https://github.com/epfromer/pst-extractor) is hosted on GitHub.
+[^2]: Extracted spreadsheets are [available on GitHub](https://github.com/SheetJS/enron_xls)
+[^3]: See [`fs.readFileSync`](https://nodejs.org/api/fs.html#fsreadfilesyncpath-options) in the NodeJS documentation
+[^4]: See [`read` in "Reading Files"](/docs/api/parse-options)
+[^5]: See ["Workbook Object"](/docs/csf/book)
+[^6]: See [`sheet_to_csv` in "CSV and Text"](/docs/api/utilities/csv#delimiter-separated-output)
+[^7]: See [`sheet_to_html` in "Utilities"](/docs/api/utilities/html#html-table-output)
--- a/docz/docs/03-demos/03-net/04-email/_category_.json
+++ b/docz/docs/03-demos/03-net/04-email/_category_.json
@ -0,0 +1,4 @@
+{
+  "label": "Electronic Mail",
+  "position": 4
+}
--- a/docz/docs/03-demos/03-net/04-email/index.md
+++ b/docz/docs/03-demos/03-net/04-email/index.md
@ -1,16 +1,12 @@
 ---
 title: Electronic Mail
 pagination_prev: demos/net/server/index
+pagination_next: demos/net/headless
 ---

 import current from '/version.js';
 import CodeBlock from '@theme/CodeBlock';

-
-<head>
-  <script src="/pst/pstextractor.js"></script>
-</head>
-
 Electronic mail ("email" or "e-mail") is an essential part of modern business
 workflows. Spreadsheets are commonly passed around and processed.

@ -367,98 +363,4 @@ proprietary mail and email account file formats.

 ### PST

-`PST` is a common file format. The `pst-extractor` library is designed for
-extracting messages and attachments from `PST` files in NodeJS and the browser.
-
-This demo uses [a special build](pathname:///pst/pstextractor.js) for the web.
-
-<details><summary><b>Build details</b> (click to show)</summary>
-
-1) Initialize a new NodeJS project and install the dependency:
-
-```bash
-mkdir pstextract
-cd pstextract
-npm init -y
-npm i --save pst-extractor@1.9.0
-```
-
-2) Save the following to `shim.js`:
-
-```js title="shim.js"
-const PSTExtractor = require("pst-extractor");
-module.exports = PSTExtractor;
-module.exports.Buffer = Buffer;
-```
-
-3) Build the script:
-
-```bash
-npx browserify@17.0.0 -s PSTExtractor -o pstextractor.js shim.js
-```
-
-</details>
-
-The [test file](pathname:///pst/enron.pst) was based on the EDRM clean extract
-from the "Enron Corpus" and includes a few XLS attachments.
-
-```jsx live
-function SheetJSPreviewPSTSheets() {
-  const [ files, setFiles ] = React.useState([]);
-  const [ __html, setHTML ] = React.useState("");
-
-  /* recursively walk PST and collect attachments */
-  const walk = (f,arr) => {
-    if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
-    if(f.contentCount > 0) for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
-      for(var i = 0; i < e.numberOfAttachments; ++i) {
-        var a = e.getAttachment(i);
-        /* XLS spreadsheet test by filename */
-        if(a.filename.endsWith(".xls")) arr.push(a);
-      }
-    }
-  }
-
-  /* view selected attachment */
-  const view = (j) => {
-    /* collect data into a "Buffer" */
-    const strm = files[j].fileInputStream;
-    const data = new PSTExtractor.Buffer(strm._length.low);
-    strm.readCompletely(data);
-
-    /* parse */
-    const wb = XLSX.read(data);
-
-    /* convert first sheet to HTML */
-    const ws = wb.Sheets[wb.SheetNames[0]];
-    setHTML(XLSX.utils.sheet_to_html(ws));
-  }
-
-  /* process array buffer */
-  const process_ab = (ab) => {
-    const pst = new (PSTExtractor.PSTFile)(new PSTExtractor.Buffer(ab));
-    const data = [];
-    walk(pst.getRootFolder(), data);
-    setFiles(data);
-  };
-
-
-  /* on click, fetch and process file */
-  const doit = async() => {
-    const ab = await (await fetch("/pst/enron.pst")).arrayBuffer();
-    process_ab(ab);
-  };
-  const chg = async(e) => process_ab(await e.target.files[0].arrayBuffer());
-
-  return ( <>
-    <p>Use the file input to select a file, or click "Use a Sample PST"</p>
-    <button onClick={doit}>Use a Sample PST!</button><br/><br/>
-    <input type="file" accept=".pst" onChange={chg}/><br/>
-    <b>Attachments</b>
-    <ul>{files.map((f,j) => (
-      <li key={j}><a onClick={()=>view(j)}>{f.filename} (click to view)</a></li>
-    ))}</ul>
-    <b>Table View</b><br/>
-    <div dangerouslySetInnerHTML={{__html}}></div>
-  </> );
-}
+**[The exposition has been moved to a separate page.](/docs/demos/net/email/pst)**
--- a/docz/docs/03-demos/03-net/08-headless.md
+++ b/docz/docs/03-demos/03-net/08-headless.md
@ -1,5 +1,6 @@
 ---
 title: Browser Automation
+pagination_prev: demos/net/email/index
 ---

 import current from '/version.js';
--- a/docz/docs/07-csf/07-features/03-hyperlinks.md
+++ b/docz/docs/07-csf/07-features/03-hyperlinks.md
@ -1,9 +1,9 @@
 ---
+title: Hyperlinks and Tooltips
+sidebar_label: Hyperlinks
 sidebar_position: 3
 ---

-# Hyperlinks
-
 <details>
  <summary><b>File Format Support</b> (click to show)</summary>

@ -254,6 +254,20 @@ XLSX documents.  A workaround was added in library version 0.18.12.

 :::

+## Tooltips
+
+Tooltips are attached to hyperlink information. There is no way to specify a
+tooltip without assigning a cell link.
+
+:::warning pass
+
+**Excel has an undocumented tooltip length limit of 255 characters.**
+
+Writing longer tooltips is currently permitted by the library but the generated
+files will not open in Excel.
+
+:::
+
 ## HTML

 The HTML DOM parser[^1] will process `<a>` links in the table.
--- a/docz/static/pst/SheetJSPST.js
+++ b/docz/static/pst/SheetJSPST.js
@ -0,0 +1,51 @@
+const fs = require("fs");
+const PSTExtractor = require("pst-extractor");
+const XLSX = require("xlsx");
+
+/* walk the PST file and add all attachments to the specified array */
+function walk(f,arr) {
+  if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
+  if(f.contentCount <= 0) return;
+  for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
+    for(let i = 0; i < e.numberOfAttachments; ++i) {
+      var a = e.getAttachment(i);
+      /* XLS spreadsheet test by filename */
+      if(/.xls[xmb]?$/.test(a.filename)) arr.push(a);
+    }
+  }
+}
+
+/* collect data from the attachment into a "Buffer" */
+function collect(file) {
+  const strm = file.fileInputStream;
+  const data = Buffer.alloc(strm._length.low);
+  strm.readCompletely(data);
+  return data;
+}
+
+(async() => {
+  /* fetch https://docs.sheetjs.com/pst/enron.pst */
+  const ab = await (await fetch("https://docs.sheetjs.com/pst/enron.pst")).arrayBuffer();
+  const pst = new (PSTExtractor.PSTFile)(Buffer.from(ab));
+
+  /* generate a list of attachments */
+  const files = [];
+  walk(pst.getRootFolder(), files);
+
+  files.forEach((file, idx) => {
+    /* extract and save workbook to file */
+    const ext = file.filename.slice(file.filename.lastIndexOf(".") + 1);
+    console.log(`saving file ${idx} |${file.filename}| to file${idx}.${ext}`);
+    const buf = collect(file);
+    fs.writeFileSync(`file${idx}.${ext}`, buf);
+
+    /* parse workbook and print CSV contents of each sheet */
+    const wb = XLSX.read(buf);
+    wb.SheetNames.forEach(n => {
+      const ws = wb.Sheets[n];
+      const csv = XLSX.utils.sheet_to_csv(ws);
+      console.log(`#### ${file.filename} ! ${n}`);
+      console.log(csv);
+    });
+  });
+})();