forked from sheetjs/docs.sheetjs.com
52 lines
1.7 KiB
JavaScript
52 lines
1.7 KiB
JavaScript
|
const fs = require("fs");
|
||
|
const PSTExtractor = require("pst-extractor");
|
||
|
const XLSX = require("xlsx");
|
||
|
|
||
|
/* walk the PST file and add all attachments to the specified array */
|
||
|
function walk(f,arr) {
|
||
|
if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
|
||
|
if(f.contentCount <= 0) return;
|
||
|
for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
|
||
|
for(let i = 0; i < e.numberOfAttachments; ++i) {
|
||
|
var a = e.getAttachment(i);
|
||
|
/* XLS spreadsheet test by filename */
|
||
|
if(/.xls[xmb]?$/.test(a.filename)) arr.push(a);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* collect data from the attachment into a "Buffer" */
|
||
|
function collect(file) {
|
||
|
const strm = file.fileInputStream;
|
||
|
const data = Buffer.alloc(strm._length.low);
|
||
|
strm.readCompletely(data);
|
||
|
return data;
|
||
|
}
|
||
|
|
||
|
(async() => {
|
||
|
/* fetch https://docs.sheetjs.com/pst/enron.pst */
|
||
|
const ab = await (await fetch("https://docs.sheetjs.com/pst/enron.pst")).arrayBuffer();
|
||
|
const pst = new (PSTExtractor.PSTFile)(Buffer.from(ab));
|
||
|
|
||
|
/* generate a list of attachments */
|
||
|
const files = [];
|
||
|
walk(pst.getRootFolder(), files);
|
||
|
|
||
|
files.forEach((file, idx) => {
|
||
|
/* extract and save workbook to file */
|
||
|
const ext = file.filename.slice(file.filename.lastIndexOf(".") + 1);
|
||
|
console.log(`saving file ${idx} |${file.filename}| to file${idx}.${ext}`);
|
||
|
const buf = collect(file);
|
||
|
fs.writeFileSync(`file${idx}.${ext}`, buf);
|
||
|
|
||
|
/* parse workbook and print CSV contents of each sheet */
|
||
|
const wb = XLSX.read(buf);
|
||
|
wb.SheetNames.forEach(n => {
|
||
|
const ws = wb.Sheets[n];
|
||
|
const csv = XLSX.utils.sheet_to_csv(ws);
|
||
|
console.log(`#### ${file.filename} ! ${n}`);
|
||
|
console.log(csv);
|
||
|
});
|
||
|
});
|
||
|
})();
|