forked from sheetjs/docs.sheetjs.com
24 lines
1.3 KiB
JavaScript
24 lines
1.3 KiB
JavaScript
const XLSX = require("xlsx");
|
|
const { readFileSync } = require("fs");
|
|
const cheerio = require("cheerio");
|
|
|
|
/* obtain HTML string. This example reads from test.html */
|
|
const html_str = readFileSync("SheetJSTable.html", "utf8");
|
|
/* get first TABLE element */
|
|
const $ = cheerio.load(html_str);
|
|
const doc = $("TABLE").first()[0];
|
|
|
|
/* FIX THE CHEERIO LIBRARY */
|
|
Object.defineProperty(doc.__proto__, "tagName", { get: function() { return Object.entries(this).find(r => r[0] == "tagName" || r[0] == "name")[1].toUpperCase(); }});
|
|
Object.defineProperty(doc.__proto__, "rows", { get: function() { return $(this).children("tbody").children("tr"); }});
|
|
Object.defineProperty(doc.__proto__, "cells", { get: function() { return $(this).children("td, th"); }});
|
|
Object.defineProperty(doc.__proto__, "ownerDocument", { get: function() { return {}; }});
|
|
doc.__proto__.hasAttribute = function(name) { return Object.hasOwnProperty.call(this.attribs, name); }
|
|
doc.__proto__.getAttribute = function(name) { return this.attribs[name]; }
|
|
Object.defineProperty(doc.__proto__, "innerHTML", { get: function() { return $(this).prop('innerHTML'); }});
|
|
doc.__proto__.getElementsByTagName = function(name) { return ($(this).children(name))}
|
|
|
|
/* generate workbook */
|
|
const workbook = XLSX.utils.table_to_book(doc);
|
|
XLSX.writeFile(workbook, "SheetJSCheerio.xlsx");
|