Writing files with adobe extendscript #986

Closed
opened 2018-02-07 14:26:29 +00:00 by grefel · 4 comments
grefel commented 2018-02-07 14:26:29 +00:00 (Migrated from github.com)

I get js-xlsx to read XLSX Files as shown in the demo https://github.com/SheetJS/js-xlsx/tree/master/demos/extendscript. Im struggling with writing a workbook to disk.

Adding the following lines to https://github.com/SheetJS/js-xlsx/blob/master/demos/extendscript/test.jsx

var binaryString = XLSX.write(workbook, {bookType:'xlsx', bookSST:true, type: 'binary'});
var outFile = File (Folder.desktop + "/test.xlsx");
outFile.encoding = "BINARY";
outFile.open( "w" );
outFile.write (binaryString);
outFile.close ();

brings the not so usable error

grafik

Trying to hunt this down I added

#include "../../jszip.js";
#include "../../xlsx.flow.js";

instead of xlsx.core.min.js

This results in some minor Regex Escaping Bugs of Extendscript I fixed here 24d00a02f2.

But the resulting File ist not a usable ZIP-Arhive:

grafik

Any help would be greatly appreciated.

This is related to #603

I get js-xlsx to read XLSX Files as shown in the demo https://github.com/SheetJS/js-xlsx/tree/master/demos/extendscript. Im struggling with writing a workbook to disk. Adding the following lines to https://github.com/SheetJS/js-xlsx/blob/master/demos/extendscript/test.jsx `var binaryString = XLSX.write(workbook, {bookType:'xlsx', bookSST:true, type: 'binary'});` `var outFile = File (Folder.desktop + "/test.xlsx");` `outFile.encoding = "BINARY";` `outFile.open( "w" );` `outFile.write (binaryString);` `outFile.close ();` brings the not so usable error ![grafik](https://user-images.githubusercontent.com/1178574/35920927-95d5d2d0-0c19-11e8-931f-15a10d1a4600.png) Trying to hunt this down I added `#include "../../jszip.js";` `#include "../../xlsx.flow.js";` instead of `xlsx.core.min.js` This results in some minor Regex Escaping Bugs of Extendscript I fixed here https://github.com/grefel/js-xlsx/commit/24d00a02f28da833f984c369796c973e53cc4ac0. But the resulting File ist not a usable ZIP-Arhive: ![grafik](https://user-images.githubusercontent.com/1178574/35921438-dd936ba4-0c1a-11e8-9c18-84fbbfcaa526.png) Any help would be greatly appreciated. This is related to #603
SheetJSDev commented 2018-02-07 17:13:03 +00:00 (Migrated from github.com)

Summary:

  • some regular expressions had unescaped / in character classes (you already noticed these)
  • some unparenthesized boolean logic, where extend script was parsing the result differently (you identified a few, but there were a few others)
  • JSZip utf8 name conversion had unparenthesized bit operations. (this is the root of the current problem)
  • ExtendScript seems to have trouble with array named keys (affects XLS and XLSB)

For XLSX conversion the following fixes resolved the problem:

diff --git a/bits/40_harb.js b/bits/40_harb.js
index bb7b0db..b8ff220 100644
--- a/bits/40_harb.js
+++ b/bits/40_harb.js
@@ -460,7 +460,7 @@ var SYLK = (function() {
 			for(var C = r.s.c; C <= r.e.c; ++C) {
 				var coord = encode_cell({r:R,c:C});
 				cell = dense ? (ws[R]||[])[C]: ws[coord];
-				if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
+				if(!cell || (cell.v == null && (!cell.f || cell.F))) continue;
 				o.push(write_ws_cell_sylk(cell, ws, R, C, opts));
 			}
 		}
diff --git a/bits/59_vba.js b/bits/59_vba.js
index 6ffe2e3..3f69886 100644
--- a/bits/59_vba.js
+++ b/bits/59_vba.js
@@ -3,7 +3,7 @@ function make_vba_xls(cfb/*:CFBContainer*/) {
 	var newcfb = CFB.utils.cfb_new({root:"R"});
 	cfb.FullPaths.forEach(function(p, i) {
 		if(p.slice(-1) === "/" || !p.match(/_VBA_PROJECT_CUR/)) return;
-		var newpath = p.replace(/^[^/]*/,"R").replace(/\/_VBA_PROJECT_CUR\u0000*/, "");
+		var newpath = p.replace(/^[^\/]*/,"R").replace(/\/_VBA_PROJECT_CUR\u0000*/, "");
 		CFB.utils.cfb_add(newcfb, newpath, cfb.FileIndex[i].content);
 	});
 	return CFB.write(newcfb);
@@ -12,7 +12,7 @@ function make_vba_xls(cfb/*:CFBContainer*/) {
 function fill_vba_xls(cfb/*:CFBContainer*/, vba/*:CFBContainer*/)/*:void*/ {
 	vba.FullPaths.forEach(function(p, i) {
 		if(i == 0) return;
-		var newpath = p.replace(/[^/]*[/]/, "/_VBA_PROJECT_CUR/");
+		var newpath = p.replace(/[^\/]*[\/]/, "/_VBA_PROJECT_CUR/");
 		if(newpath.slice(-1) !== "/") CFB.utils.cfb_add(cfb, newpath, vba.FileIndex[i].content);
 	});
 }
diff --git a/bits/67_wsxml.js b/bits/67_wsxml.js
index fdbc5fc..1337c38 100644
--- a/bits/67_wsxml.js
+++ b/bits/67_wsxml.js
@@ -429,7 +429,7 @@ function write_ws_xml_data(ws/*:Worksheet*/, opts, idx/*:number*/, wb/*:Workbook
 			if(_cell === undefined) continue;
 			if((cell = write_ws_xml_cell(_cell, ref, ws, opts, idx, wb)) != null) r.push(cell);
 		}
-		if(r.length > 0 || rows && rows[R]) {
+		if(r.length > 0 || (rows && rows[R])) {
 			params = ({r:rr}/*:any*/);
 			if(rows && rows[R]) {
 				row = rows[R];
diff --git a/bits/75_xlml.js b/bits/75_xlml.js
index 11c4218..a428536 100644
--- a/bits/75_xlml.js
+++ b/bits/75_xlml.js
@@ -1013,7 +1013,7 @@ function write_ws_xlml_comment(comments/*:Array<any>*/)/*:string*/ {
 	}).join("");
 }
 function write_ws_xlml_cell(cell, ref/*:string*/, ws, opts, idx/*:number*/, wb, addr)/*:string*/{
-	if(!cell || cell.v == undefined && cell.f == undefined) return "";
+	if(!cell || (cell.v == undefined && cell.f == undefined)) return "";
 
 	var attr = {};
 	if(cell.f) attr["ss:Formula"] = "=" + escapexml(a1_to_rc(cell.f, addr));
diff --git a/jszip.js b/jszip.js
index 21da89f..948d9de 100644
--- a/jszip.js
+++ b/jszip.js
@@ -1624,14 +1624,14 @@ var string2buf = function (str) {
     // count binary size
     for (m_pos = 0; m_pos < str_len; m_pos++) {
         c = str.charCodeAt(m_pos);
-        if ((c & 0xfc00) === 0xd800 && (m_pos+1 < str_len)) {
+        if (((c & 0xfc00) === 0xd800) && (m_pos+1 < str_len)) {
             c2 = str.charCodeAt(m_pos+1);
             if ((c2 & 0xfc00) === 0xdc00) {
                 c = 0x10000 + ((c - 0xd800) << 10) + (c2 - 0xdc00);
                 m_pos++;
             }
         }
-        buf_len += c < 0x80 ? 1 : c < 0x800 ? 2 : c < 0x10000 ? 3 : 4;
+        buf_len += (c < 0x80) ? 1 : ((c < 0x800) ? 2 : ((c < 0x10000) ? 3 : 4));
     }
 
     // allocate buffer
@@ -1661,13 +1661,13 @@ var string2buf = function (str) {
         } else if (c < 0x10000) {
             /* three bytes */
             buf[i++] = 0xE0 | (c >>> 12);
-            buf[i++] = 0x80 | (c >>> 6 & 0x3f);
+            buf[i++] = 0x80 | ((c >>> 6) & 0x3f);
             buf[i++] = 0x80 | (c & 0x3f);
         } else {
             /* four bytes */
             buf[i++] = 0xf0 | (c >>> 18);
-            buf[i++] = 0x80 | (c >>> 12 & 0x3f);
-            buf[i++] = 0x80 | (c >>> 6 & 0x3f);
+            buf[i++] = 0x80 | ((c >>> 12) & 0x3f);
+            buf[i++] = 0x80 | ((c >>> 6) & 0x3f);
             buf[i++] = 0x80 | (c & 0x3f);
         }
     }

I am not quite sure why the minified version is causing issues, but it looks related to where the newline appears in a switch block. Might have to change how newlines are inserted.

You can test with the following script:

var thisFile = new File($.fileName);  
var basePath = thisFile.path;  

#include "shim.js";
#include "jszip.js";
#include "xlsx.js";

var filename = "/sheetjs.xlsx";

/* Read file from disk */
var infile = File(basePath+filename);
infile.open("r");
infile.encoding = "binary";
var data = infile.read();

/* Parse file */
var workbook = XLSX.read(data, {type:"binary"});

/* Display first worksheet */
var first_sheet_name = workbook.SheetNames[0], first_worksheet = workbook.Sheets[first_sheet_name];
var data = XLSX.utils.sheet_to_json(first_worksheet, {header:1});
alert(data);

var outfmts = [

  ["xlml",  "test.xml.xls"],
  ["fods",  "test.fods"],
  ["csv",   "test.csv"],
  ["txt",   "test.txt"],
  ["slk",   "test.slk"],
  ["eth",   "test.eth"],
  ["htm",   "test.htm"],
  ["dif",   "test.dif"],
  ["ods",   "test.ods"],
/*
  ["xlsb",  "test.xlsb"],
  ["biff8", "test.biff8.xls"],
  ["biff5", "test.biff5.xls"],
  ["biff2", "test.biff2.xls"],
*/
  ["xlsx",  "test.xlsx"]
];
for(var i = 0; i < outfmts.length; ++i) {
  alert(outfmts[i][0]);
  /* Generate new file */
  var wbout = XLSX.write(workbook, {bookType:outfmts[i][0], bookSST:true, type:'binary', cellDates:true});

  /* Roundtrip and Display first worksheet */
  var wb = XLSX.read(wbout, {type:"binary"});
  var f_sheet_name = wb.SheetNames[0], f_worksheet = wb.Sheets[f_sheet_name];
  var data = XLSX.utils.sheet_to_json(f_worksheet, {header:1, cellDates:true});
  alert(data);

  /* Write file to disk */
  var outFile = File(basePath + "/" + outfmts[i][1]);
  outFile.open("w");
  outFile.encoding = "binary";
  outFile.write(wbout);
  outFile.close();
}

(As you probably saw, the current tests only try reading from XLSX, so none of the write code paths and none of the non-XLSX read paths have been tested in ExtendScript. We'll close this issue once the full roundtrip tests pass for all of the supported formats)

Summary: - some regular expressions had unescaped `/` in character classes (you already noticed these) - some unparenthesized boolean logic, where extend script was parsing the result differently (you identified a few, but there were a few others) - JSZip utf8 name conversion had unparenthesized bit operations. (this is the root of the current problem) - ExtendScript seems to have trouble with array named keys (affects XLS and XLSB) For XLSX conversion the following fixes resolved the problem: ```diff diff --git a/bits/40_harb.js b/bits/40_harb.js index bb7b0db..b8ff220 100644 --- a/bits/40_harb.js +++ b/bits/40_harb.js @@ -460,7 +460,7 @@ var SYLK = (function() { for(var C = r.s.c; C <= r.e.c; ++C) { var coord = encode_cell({r:R,c:C}); cell = dense ? (ws[R]||[])[C]: ws[coord]; - if(!cell || cell.v == null && (!cell.f || cell.F)) continue; + if(!cell || (cell.v == null && (!cell.f || cell.F))) continue; o.push(write_ws_cell_sylk(cell, ws, R, C, opts)); } } diff --git a/bits/59_vba.js b/bits/59_vba.js index 6ffe2e3..3f69886 100644 --- a/bits/59_vba.js +++ b/bits/59_vba.js @@ -3,7 +3,7 @@ function make_vba_xls(cfb/*:CFBContainer*/) { var newcfb = CFB.utils.cfb_new({root:"R"}); cfb.FullPaths.forEach(function(p, i) { if(p.slice(-1) === "/" || !p.match(/_VBA_PROJECT_CUR/)) return; - var newpath = p.replace(/^[^/]*/,"R").replace(/\/_VBA_PROJECT_CUR\u0000*/, ""); + var newpath = p.replace(/^[^\/]*/,"R").replace(/\/_VBA_PROJECT_CUR\u0000*/, ""); CFB.utils.cfb_add(newcfb, newpath, cfb.FileIndex[i].content); }); return CFB.write(newcfb); @@ -12,7 +12,7 @@ function make_vba_xls(cfb/*:CFBContainer*/) { function fill_vba_xls(cfb/*:CFBContainer*/, vba/*:CFBContainer*/)/*:void*/ { vba.FullPaths.forEach(function(p, i) { if(i == 0) return; - var newpath = p.replace(/[^/]*[/]/, "/_VBA_PROJECT_CUR/"); + var newpath = p.replace(/[^\/]*[\/]/, "/_VBA_PROJECT_CUR/"); if(newpath.slice(-1) !== "/") CFB.utils.cfb_add(cfb, newpath, vba.FileIndex[i].content); }); } diff --git a/bits/67_wsxml.js b/bits/67_wsxml.js index fdbc5fc..1337c38 100644 --- a/bits/67_wsxml.js +++ b/bits/67_wsxml.js @@ -429,7 +429,7 @@ function write_ws_xml_data(ws/*:Worksheet*/, opts, idx/*:number*/, wb/*:Workbook if(_cell === undefined) continue; if((cell = write_ws_xml_cell(_cell, ref, ws, opts, idx, wb)) != null) r.push(cell); } - if(r.length > 0 || rows && rows[R]) { + if(r.length > 0 || (rows && rows[R])) { params = ({r:rr}/*:any*/); if(rows && rows[R]) { row = rows[R]; diff --git a/bits/75_xlml.js b/bits/75_xlml.js index 11c4218..a428536 100644 --- a/bits/75_xlml.js +++ b/bits/75_xlml.js @@ -1013,7 +1013,7 @@ function write_ws_xlml_comment(comments/*:Array<any>*/)/*:string*/ { }).join(""); } function write_ws_xlml_cell(cell, ref/*:string*/, ws, opts, idx/*:number*/, wb, addr)/*:string*/{ - if(!cell || cell.v == undefined && cell.f == undefined) return ""; + if(!cell || (cell.v == undefined && cell.f == undefined)) return ""; var attr = {}; if(cell.f) attr["ss:Formula"] = "=" + escapexml(a1_to_rc(cell.f, addr)); diff --git a/jszip.js b/jszip.js index 21da89f..948d9de 100644 --- a/jszip.js +++ b/jszip.js @@ -1624,14 +1624,14 @@ var string2buf = function (str) { // count binary size for (m_pos = 0; m_pos < str_len; m_pos++) { c = str.charCodeAt(m_pos); - if ((c & 0xfc00) === 0xd800 && (m_pos+1 < str_len)) { + if (((c & 0xfc00) === 0xd800) && (m_pos+1 < str_len)) { c2 = str.charCodeAt(m_pos+1); if ((c2 & 0xfc00) === 0xdc00) { c = 0x10000 + ((c - 0xd800) << 10) + (c2 - 0xdc00); m_pos++; } } - buf_len += c < 0x80 ? 1 : c < 0x800 ? 2 : c < 0x10000 ? 3 : 4; + buf_len += (c < 0x80) ? 1 : ((c < 0x800) ? 2 : ((c < 0x10000) ? 3 : 4)); } // allocate buffer @@ -1661,13 +1661,13 @@ var string2buf = function (str) { } else if (c < 0x10000) { /* three bytes */ buf[i++] = 0xE0 | (c >>> 12); - buf[i++] = 0x80 | (c >>> 6 & 0x3f); + buf[i++] = 0x80 | ((c >>> 6) & 0x3f); buf[i++] = 0x80 | (c & 0x3f); } else { /* four bytes */ buf[i++] = 0xf0 | (c >>> 18); - buf[i++] = 0x80 | (c >>> 12 & 0x3f); - buf[i++] = 0x80 | (c >>> 6 & 0x3f); + buf[i++] = 0x80 | ((c >>> 12) & 0x3f); + buf[i++] = 0x80 | ((c >>> 6) & 0x3f); buf[i++] = 0x80 | (c & 0x3f); } } ``` I am not quite sure why the minified version is causing issues, but it looks related to where the newline appears in a switch block. Might have to change how newlines are inserted. You can test with the following script: ```javascript var thisFile = new File($.fileName); var basePath = thisFile.path; #include "shim.js"; #include "jszip.js"; #include "xlsx.js"; var filename = "/sheetjs.xlsx"; /* Read file from disk */ var infile = File(basePath+filename); infile.open("r"); infile.encoding = "binary"; var data = infile.read(); /* Parse file */ var workbook = XLSX.read(data, {type:"binary"}); /* Display first worksheet */ var first_sheet_name = workbook.SheetNames[0], first_worksheet = workbook.Sheets[first_sheet_name]; var data = XLSX.utils.sheet_to_json(first_worksheet, {header:1}); alert(data); var outfmts = [ ["xlml", "test.xml.xls"], ["fods", "test.fods"], ["csv", "test.csv"], ["txt", "test.txt"], ["slk", "test.slk"], ["eth", "test.eth"], ["htm", "test.htm"], ["dif", "test.dif"], ["ods", "test.ods"], /* ["xlsb", "test.xlsb"], ["biff8", "test.biff8.xls"], ["biff5", "test.biff5.xls"], ["biff2", "test.biff2.xls"], */ ["xlsx", "test.xlsx"] ]; for(var i = 0; i < outfmts.length; ++i) { alert(outfmts[i][0]); /* Generate new file */ var wbout = XLSX.write(workbook, {bookType:outfmts[i][0], bookSST:true, type:'binary', cellDates:true}); /* Roundtrip and Display first worksheet */ var wb = XLSX.read(wbout, {type:"binary"}); var f_sheet_name = wb.SheetNames[0], f_worksheet = wb.Sheets[f_sheet_name]; var data = XLSX.utils.sheet_to_json(f_worksheet, {header:1, cellDates:true}); alert(data); /* Write file to disk */ var outFile = File(basePath + "/" + outfmts[i][1]); outFile.open("w"); outFile.encoding = "binary"; outFile.write(wbout); outFile.close(); } ``` (As you probably saw, the current tests only try reading from XLSX, so none of the write code paths and none of the non-XLSX read paths have been tested in ExtendScript. We'll close this issue once the full roundtrip tests pass for all of the supported formats)
grefel commented 2018-02-08 11:08:37 +00:00 (Migrated from github.com)

Wow, that was quick. Thanks for looking into it.

Your suggested patch works for me.

Textformats are looking great. XLSX and ODS are working as well!

Regarding the minified version: I encounter these sort of problems often in ExtendScript and minified JS. I think most ExtendScript users are happy without a minified version, beacuse these scripts are not delivered throught HTTP rather saved into the Scripts Folder of the application on a harddisk.

Wow, that was quick. **Thanks** for looking into it. Your suggested patch works for me. Textformats are looking great. XLSX and ODS are working as well! Regarding the **minified version**: I encounter these sort of problems often in ExtendScript and minified JS. I think most ExtendScript users are happy without a minified version, beacuse these scripts are not delivered throught HTTP rather saved into the Scripts Folder of the application on a harddisk.
SheetJSDev commented 2018-02-08 19:22:00 +00:00 (Migrated from github.com)

There's a new xlsx.extendscript.js amalgamation which includes everything. readFile and writeFile now support the ExtendScript environment. See the demo for more details.

There's a new [`xlsx.extendscript.js`](https://github.com/SheetJS/js-xlsx/blob/master/dist/xlsx.extendscript.js) amalgamation which includes everything. `readFile` and `writeFile` now support the ExtendScript environment. See [the demo](https://github.com/SheetJS/js-xlsx/tree/master/demos/extendscript) for more details.
grefel commented 2018-02-08 20:16:18 +00:00 (Migrated from github.com)

This is great. Thank you!

This is great. Thank you!
Sign in to join this conversation.
No Milestone
No Assignees
1 Participants
Notifications
Due Date
The due date is invalid or out of range. Please use the format 'yyyy-mm-dd'.

No due date set.

Dependencies

No dependencies set.

Reference: sheetjs/sheetjs#986
No description provided.