2024-06-25 07:16:41 +00:00
|
|
|
#!/bin/bash
|
|
|
|
# https://docs.sheetjs.com/docs/demos/net/headless#puppeteer
|
|
|
|
|
|
|
|
cd /tmp
|
|
|
|
rm -rf sheetjs-puppeteer
|
|
|
|
mkdir sheetjs-puppeteer
|
|
|
|
cd sheetjs-puppeteer
|
|
|
|
|
2024-07-18 22:19:02 +00:00
|
|
|
npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz puppeteer@22.12.0
|
2024-06-25 07:16:41 +00:00
|
|
|
|
|
|
|
cat >SheetJSPuppeteer.js <<EOF
|
|
|
|
const fs = require("fs");
|
|
|
|
const puppeteer = require('puppeteer');
|
|
|
|
|
|
|
|
(async () => {
|
|
|
|
/* (1) Load the target page */
|
|
|
|
const browser = await puppeteer.launch();
|
|
|
|
const page = await browser.newPage();
|
|
|
|
page.on("console", msg => console.log("PAGE LOG:", msg.text()));
|
|
|
|
await page.setViewport({width: 1920, height: 1080});
|
|
|
|
await page.goto('https://sheetjs.com/demos/table');
|
|
|
|
|
|
|
|
/* (2) Load the standalone SheetJS build from the CDN */
|
2024-07-18 22:19:02 +00:00
|
|
|
await page.addScriptTag({ url: 'https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js' });
|
2024-06-25 07:16:41 +00:00
|
|
|
|
|
|
|
/* (3) Run the snippet in browser and return data */
|
|
|
|
const bin = await page.evaluate(() => {
|
|
|
|
|
|
|
|
/* find first table */
|
|
|
|
var table = document.body.getElementsByTagName('table')[0];
|
|
|
|
|
|
|
|
/* call table_to_book on first table */
|
|
|
|
var wb = XLSX.utils.table_to_book(table);
|
|
|
|
|
|
|
|
/* generate XLSB and return binary string */
|
|
|
|
return XLSX.write(wb, {type: "binary", bookType: "xlsb"});
|
|
|
|
});
|
|
|
|
|
|
|
|
/* (4) write data to file */
|
|
|
|
fs.writeFileSync("SheetJSPuppeteer.xlsb", bin, { encoding: "binary" });
|
|
|
|
|
|
|
|
await browser.close();
|
|
|
|
})();
|
|
|
|
EOF
|
|
|
|
|
|
|
|
node SheetJSPuppeteer.js
|
|
|
|
npx -y xlsx-cli SheetJSPuppeteer.xlsb
|