version bump 0.9.9: basic TXT/CSV read

fixes #489 h/t @vijayst
fixes #617 h/t @ayush000
This commit is contained in:
SheetJS 2017-04-03 02:02:02 -04:00
parent b9dae134f2
commit 01d1c32fa1
21 changed files with 869 additions and 179 deletions

@ -179,7 +179,7 @@ readme: README.md ## Update README Table of Contents
markdown-toc -i README.md
.PHONY: book
book: readme ## Update summary for documentation
book: readme graph ## Update summary for documentation
printf "# Summary\n\n- [xlsx](README.md#xlsx)\n" > misc/docs/SUMMARY.md
markdown-toc README.md | sed 's/(#/(README.md#/g'>> misc/docs/SUMMARY.md

@ -887,6 +887,10 @@ Plaintext format guessing follows the priority order:
| Format | Test |
|:-------|:--------------------------------------------------------------------|
| XML | starts with < |
| DSV | starts with `/sep=.$/`, separator is the specified character |
| TSV | one of the first 1024 characters is a tab char `"\t"` |
| CSV | one of the first 1024 characters is a comma char `","` |
| PRN | (default) |
## Writing Options
@ -1167,11 +1171,11 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats:
| Excel 3.0 (XLS BIFF3) | :o: | |
| Excel 2.0/2.1 (XLS BIFF2) | :o: | :o: |
| **Excel Supported Text Formats** |:-----:|:-----:|
| Delimiter-Separated Values (CSV/TXT) | | :o: |
| Delimiter-Separated Values (CSV/TXT) | :o: | :o: |
| Data Interchange Format (DIF) | :o: | :o: |
| Symbolic Link (SYLK/SLK) | :o: | :o: |
| Lotus Formatted Text (PRN) | :o: | :o: |
| UTF-16 Unicode Text (TXT) | | :o: |
| UTF-16 Unicode Text (TXT) | :o: | :o: |
| **Other Workbook/Worksheet Formats** |:-----:|:-----:|
| OpenDocument Spreadsheet (ODS) | :o: | :o: |
| Flat XML ODF Spreadsheet (FODS) | :o: | :o: |
@ -1243,7 +1247,7 @@ lies in the names of tags and attributes.
Excel CSV deviates from RFC4180 in a number of important ways. The generated
CSV files should generally work in Excel although they may not work in RFC4180
compatible readers.
compatible readers. The parser should generally understand Excel CSV.
Excel TXT uses tab as the delimiter and codepage 1200.

@ -1 +1 @@
XLSX.version = '0.9.8';
XLSX.version = '0.9.9';

@ -369,16 +369,6 @@ var DIF = (function() {
})();
var PRN = (function() {
function prn_to_aoa(d/*:RawData*/, opts)/*:AOA*/ {
switch(opts.type) {
case 'base64': return prn_to_aoa_str(Base64.decode(d), opts);
case 'binary': return prn_to_aoa_str(d, opts);
case 'buffer': return prn_to_aoa_str(d.toString('binary'), opts);
case 'array': return prn_to_aoa_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
function set_text_arr(data/*:string*/, arr/*:AOA*/, R/*:number*/, C/*:number*/) {
if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
@ -411,7 +401,54 @@ var PRN = (function() {
return arr;
}
function prn_to_sheet(str/*:string*/, opts)/*:Worksheet*/ { return aoa_to_sheet(prn_to_aoa(str, opts), opts); }
function dsv_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
var sep = "";
var ws/*:Worksheet*/ = ({}/*:any*/);
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:0}}/*:any*/);
/* known sep */
if(str.substr(0,4) == "sep=" && str.charCodeAt(5) == 10) { sep = str.charAt(4); str = str.substr(6); }
/* TODO: actually determine the separator */
if(str.substr(0,1024).indexOf("\t") == -1) sep = ","; else sep = "\t";
var R = 0, C = 0, v = 0;
var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0;
for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) {
case 0x22: instr = !instr; break;
case sepcc: case 0x0a: if(instr) break;
var s = str.slice(start, end);
var cell = ({}/*:any*/)
if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); }
else if(s == "TRUE") { cell.t = 'b'; cell.v = true; }
else if(s == "FALSE") { cell.t = 'b'; cell.v = false; }
else if(!isNaN(v = parseFloat(s))) { cell.t = 'n'; cell.w = s; cell.v = v; }
else { cell.t = 's'; cell.v = s.replace(/^"/,"").replace(/"$/,"").replace(/""/g,'"'); }
ws[encode_cell({c:C,r:R})] = cell;
start = end+1;
if(range.e.c < C) range.e.c = C;
if(range.e.r < R) range.e.r = R;
if(cc == sepcc) ++C; else { C = 0; ++R; }; break;
default: break;
}
ws['!ref'] = encode_range(range);
return ws;
}
function prn_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
if(str.substr(0,4) == "sep=") return dsv_to_sheet_str(str, opts);
if(str.indexOf("\t") >= 0 || str.indexOf(",") >= 0) return dsv_to_sheet_str(str, opts);
return aoa_to_sheet(prn_to_aoa_str(str, opts), opts);
}
function prn_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ {
switch(opts.type) {
case 'base64': return prn_to_sheet_str(Base64.decode(d), opts);
case 'binary': return prn_to_sheet_str(d, opts);
case 'buffer': return prn_to_sheet_str(d.toString('binary'), opts);
case 'array': return prn_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
function prn_to_workbook(str/*:string*/, opts)/*:Workbook*/ { return sheet_to_workbook(prn_to_sheet(str, opts), opts); }

@ -24,6 +24,15 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
return parse_zip(zip, o);
}
function read_utf16(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ {
var d = data;
if(o.type == 'base64') d = Base64.decode(d);
d = cptable.utils.decode(1200, d.slice(2));
o.type = "binary";
if(d.charCodeAt(0) == 0x3C) return parse_xlml(d,o);
return PRN.to_workbook(d, o);
}
function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
var zip, d = data, n=[0];
var o = opts||{};
@ -37,11 +46,11 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
case 0x54: if(n[1] == 0x41 && n[2] == 0x42 && n[3] == 0x4C) return DIF.to_workbook(d, o); break;
case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break;
case 0xEF: return parse_xlml(d, o);
case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break;
case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o);
}
if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o);
if(0x20>n[0]||n[0]>0x7F) throw new Error("Unsupported file " + n.join("|"));
/* TODO: CSV / TXT */
return PRN.to_workbook(d, o);
}

26
dist/xlsx.core.min.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

26
dist/xlsx.full.min.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

705
dist/xlsx.js vendored

File diff suppressed because it is too large Load Diff

25
dist/xlsx.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/xlsx.min.map vendored

File diff suppressed because one or more lines are too long

@ -76,5 +76,9 @@ Plaintext format guessing follows the priority order:
| Format | Test |
|:-------|:--------------------------------------------------------------------|
| XML | starts with < |
| DSV | starts with `/sep=.$/`, separator is the specified character |
| TSV | one of the first 1024 characters is a tab char `"\t"` |
| CSV | one of the first 1024 characters is a comma char `","` |
| PRN | (default) |

@ -14,11 +14,11 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats:
| Excel 3.0 (XLS BIFF3) | :o: | |
| Excel 2.0/2.1 (XLS BIFF2) | :o: | :o: |
| **Excel Supported Text Formats** |:-----:|:-----:|
| Delimiter-Separated Values (CSV/TXT) | | :o: |
| Delimiter-Separated Values (CSV/TXT) | :o: | :o: |
| Data Interchange Format (DIF) | :o: | :o: |
| Symbolic Link (SYLK/SLK) | :o: | :o: |
| Lotus Formatted Text (PRN) | :o: | :o: |
| UTF-16 Unicode Text (TXT) | | :o: |
| UTF-16 Unicode Text (TXT) | :o: | :o: |
| **Other Workbook/Worksheet Formats** |:-----:|:-----:|
| OpenDocument Spreadsheet (ODS) | :o: | :o: |
| Flat XML ODF Spreadsheet (FODS) | :o: | :o: |
@ -90,7 +90,7 @@ lies in the names of tags and attributes.
Excel CSV deviates from RFC4180 in a number of important ways. The generated
CSV files should generally work in Excel although they may not work in RFC4180
compatible readers.
compatible readers. The parser should generally understand Excel CSV.
Excel TXT uses tab as the delimiter and codepage 1200.

@ -13,15 +13,11 @@ digraph G {
xls8 [label="XLS\nBIFF8"];
}
subgraph OD {
node [style=filled,color=yellow];
subgraph OLD {
node [style=filled,color=cyan];
ods [label="ODS"];
fods [label="FODS"];
uos [label="UOS"];
}
subgraph OLD {
node [style=filled,color=cyan];
html [label="HTML\nTable"];
csv [label="CSV"];
txt [label="TXT\nUTF-16"];
@ -59,7 +55,9 @@ digraph G {
dif -> csf
prn -> csf
csf -> prn
csv -> csf
csf -> csv
txt -> csf
csf -> txt
dbf -> csf
html -> csf

Binary file not shown.

Before

Width:  |  Height:  |  Size: 119 KiB

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 47 KiB

After

Width:  |  Height:  |  Size: 45 KiB

@ -7,16 +7,16 @@ digraph G {
label="Supported Format Types"
color="white"
XL[label="Excel",style=filled,color=green];
OD[label="ODF",style=filled,color=yellow];
CSF[label="JS",shape=doublecircle];
OLD[label="Other",style=filled,color=cyan];
{ edge[style=invis] XL -> OD -> OLD[constraint=false]}
{ edge[style=invis] XL -> CSF -> OLD[constraint=false]}
}
subgraph cluster_1 {
label="Workbook Format Conversions (blue arrow)"
color="white"
x1i[label="XLSX"]
c1[shape=doublecircle,label="CSF"];
c1[shape=doublecircle,label="JS"];
x1o[label="XLSB"]
{ edge[color=blue]
x1i->c1[constraint=false,label="read"]
@ -28,7 +28,7 @@ digraph G {
label="Single-Worksheet Format Conversions (green arrow)"
color="white"
x2i[label="SYLK"]
c2[shape=doublecircle,label="CSF"];
c2[shape=doublecircle,label="JS"];
x2o[label="CSV"]
{ edge[color=aquamarine4]
x2i->c2[constraint=false,label="read"]
@ -37,6 +37,6 @@ digraph G {
}
{ edge[style=invis] XL -> x1i -> x2i; }
{ edge[style=invis] OD -> c1 -> c2; }
{ edge[style=invis] CSF -> c1 -> c2; }
{ edge[style=invis] OLD -> x1o -> x2o; }
}

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.9.8",
"version": "0.9.9",
"author": "sheetjs",
"description": "Excel (XLSB/XLSX/XLSM/XLS/XML) and ODS (ODS/FODS/UOS) spreadsheet parser and writer",
"keywords": [ "excel", "xls", "xlsx", "xlsb", "xlsm", "ods", "office", "spreadsheet" ],

@ -5,7 +5,7 @@
/*exported XLSX */
var XLSX = {};
(function make_xlsx(XLSX){
XLSX.version = '0.9.8';
XLSX.version = '0.9.9';
var current_codepage = 1200, current_cptable;
/*:: declare var cptable:any; */
if(typeof module !== "undefined" && typeof require !== 'undefined') {
@ -5129,16 +5129,6 @@ var DIF = (function() {
})();
var PRN = (function() {
function prn_to_aoa(d/*:RawData*/, opts)/*:AOA*/ {
switch(opts.type) {
case 'base64': return prn_to_aoa_str(Base64.decode(d), opts);
case 'binary': return prn_to_aoa_str(d, opts);
case 'buffer': return prn_to_aoa_str(d.toString('binary'), opts);
case 'array': return prn_to_aoa_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
function set_text_arr(data/*:string*/, arr/*:AOA*/, R/*:number*/, C/*:number*/) {
if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
@ -5171,7 +5161,54 @@ var PRN = (function() {
return arr;
}
function prn_to_sheet(str/*:string*/, opts)/*:Worksheet*/ { return aoa_to_sheet(prn_to_aoa(str, opts), opts); }
function dsv_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
var sep = "";
var ws/*:Worksheet*/ = ({}/*:any*/);
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:0}}/*:any*/);
/* known sep */
if(str.substr(0,4) == "sep=" && str.charCodeAt(5) == 10) { sep = str.charAt(4); str = str.substr(6); }
/* TODO: actually determine the separator */
if(str.substr(0,1024).indexOf("\t") == -1) sep = ","; else sep = "\t";
var R = 0, C = 0, v = 0;
var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0;
for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) {
case 0x22: instr = !instr; break;
case sepcc: case 0x0a: if(instr) break;
var s = str.slice(start, end);
var cell = ({}/*:any*/)
if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); }
else if(s == "TRUE") { cell.t = 'b'; cell.v = true; }
else if(s == "FALSE") { cell.t = 'b'; cell.v = false; }
else if(!isNaN(v = parseFloat(s))) { cell.t = 'n'; cell.w = s; cell.v = v; }
else { cell.t = 's'; cell.v = s.replace(/^"/,"").replace(/"$/,"").replace(/""/g,'"'); }
ws[encode_cell({c:C,r:R})] = cell;
start = end+1;
if(range.e.c < C) range.e.c = C;
if(range.e.r < R) range.e.r = R;
if(cc == sepcc) ++C; else { C = 0; ++R; }; break;
default: break;
}
ws['!ref'] = encode_range(range);
return ws;
}
function prn_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
if(str.substr(0,4) == "sep=") return dsv_to_sheet_str(str, opts);
if(str.indexOf("\t") >= 0 || str.indexOf(",") >= 0) return dsv_to_sheet_str(str, opts);
return aoa_to_sheet(prn_to_aoa_str(str, opts), opts);
}
function prn_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ {
switch(opts.type) {
case 'base64': return prn_to_sheet_str(Base64.decode(d), opts);
case 'binary': return prn_to_sheet_str(d, opts);
case 'buffer': return prn_to_sheet_str(d.toString('binary'), opts);
case 'array': return prn_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
function prn_to_workbook(str/*:string*/, opts)/*:Workbook*/ { return sheet_to_workbook(prn_to_sheet(str, opts), opts); }
@ -15064,6 +15101,15 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
return parse_zip(zip, o);
}
function read_utf16(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ {
var d = data;
if(o.type == 'base64') d = Base64.decode(d);
d = cptable.utils.decode(1200, d.slice(2));
o.type = "binary";
if(d.charCodeAt(0) == 0x3C) return parse_xlml(d,o);
return PRN.to_workbook(d, o);
}
function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
var zip, d = data, n=[0];
var o = opts||{};
@ -15077,11 +15123,11 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
case 0x54: if(n[1] == 0x41 && n[2] == 0x42 && n[3] == 0x4C) return DIF.to_workbook(d, o); break;
case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break;
case 0xEF: return parse_xlml(d, o);
case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break;
case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o);
}
if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o);
if(0x20>n[0]||n[0]>0x7F) throw new Error("Unsupported file " + n.join("|"));
/* TODO: CSV / TXT */
return PRN.to_workbook(d, o);
}

72
xlsx.js

@ -5,7 +5,7 @@
/*exported XLSX */
var XLSX = {};
(function make_xlsx(XLSX){
XLSX.version = '0.9.8';
XLSX.version = '0.9.9';
var current_codepage = 1200, current_cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('./dist/cpexcel.js');
@ -5073,16 +5073,6 @@ var DIF = (function() {
})();
var PRN = (function() {
function prn_to_aoa(d, opts) {
switch(opts.type) {
case 'base64': return prn_to_aoa_str(Base64.decode(d), opts);
case 'binary': return prn_to_aoa_str(d, opts);
case 'buffer': return prn_to_aoa_str(d.toString('binary'), opts);
case 'array': return prn_to_aoa_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
function set_text_arr(data, arr, R, C) {
if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
@ -5115,7 +5105,54 @@ var PRN = (function() {
return arr;
}
function prn_to_sheet(str, opts) { return aoa_to_sheet(prn_to_aoa(str, opts), opts); }
function dsv_to_sheet_str(str, opts) {
var sep = "";
var ws = ({});
var range = ({s: {c:0, r:0}, e: {c:0, r:0}});
/* known sep */
if(str.substr(0,4) == "sep=" && str.charCodeAt(5) == 10) { sep = str.charAt(4); str = str.substr(6); }
/* TODO: actually determine the separator */
if(str.substr(0,1024).indexOf("\t") == -1) sep = ","; else sep = "\t";
var R = 0, C = 0, v = 0;
var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0;
for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) {
case 0x22: instr = !instr; break;
case sepcc: case 0x0a: if(instr) break;
var s = str.slice(start, end);
var cell = ({})
if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); }
else if(s == "TRUE") { cell.t = 'b'; cell.v = true; }
else if(s == "FALSE") { cell.t = 'b'; cell.v = false; }
else if(!isNaN(v = parseFloat(s))) { cell.t = 'n'; cell.w = s; cell.v = v; }
else { cell.t = 's'; cell.v = s.replace(/^"/,"").replace(/"$/,"").replace(/""/g,'"'); }
ws[encode_cell({c:C,r:R})] = cell;
start = end+1;
if(range.e.c < C) range.e.c = C;
if(range.e.r < R) range.e.r = R;
if(cc == sepcc) ++C; else { C = 0; ++R; }; break;
default: break;
}
ws['!ref'] = encode_range(range);
return ws;
}
function prn_to_sheet_str(str, opts) {
if(str.substr(0,4) == "sep=") return dsv_to_sheet_str(str, opts);
if(str.indexOf("\t") >= 0 || str.indexOf(",") >= 0) return dsv_to_sheet_str(str, opts);
return aoa_to_sheet(prn_to_aoa_str(str, opts), opts);
}
function prn_to_sheet(d, opts) {
switch(opts.type) {
case 'base64': return prn_to_sheet_str(Base64.decode(d), opts);
case 'binary': return prn_to_sheet_str(d, opts);
case 'buffer': return prn_to_sheet_str(d.toString('binary'), opts);
case 'array': return prn_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
function prn_to_workbook(str, opts) { return sheet_to_workbook(prn_to_sheet(str, opts), opts); }
@ -14999,6 +15036,15 @@ var zip, d = data;
return parse_zip(zip, o);
}
function read_utf16(data, o) {
var d = data;
if(o.type == 'base64') d = Base64.decode(d);
d = cptable.utils.decode(1200, d.slice(2));
o.type = "binary";
if(d.charCodeAt(0) == 0x3C) return parse_xlml(d,o);
return PRN.to_workbook(d, o);
}
function readSync(data, opts) {
var zip, d = data, n=[0];
var o = opts||{};
@ -15012,11 +15058,11 @@ function readSync(data, opts) {
case 0x54: if(n[1] == 0x41 && n[2] == 0x42 && n[3] == 0x4C) return DIF.to_workbook(d, o); break;
case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break;
case 0xEF: return parse_xlml(d, o);
case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break;
case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o);
}
if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o);
if(0x20>n[0]||n[0]>0x7F) throw new Error("Unsupported file " + n.join("|"));
/* TODO: CSV / TXT */
return PRN.to_workbook(d, o);
}