From 5c4bf62b83808ab94bf5a58f157b2f4857da3df5 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Fri, 31 Jan 2014 06:52:18 -0500 Subject: [PATCH] version bump 0.4.3: shared strings and formatting - XLSB basic support for number formats - removed extraneous Strings table element - formatting xml tags (rich_stress_test) - updating test_files to 20130130 --- bits/31_version.js | 2 +- bits/38_recordhopper.js | 3 +- bits/39_parsestructs.js | 2 +- bits/52_sstxml.js | 12 +++-- bits/57_styxml.js | 3 +- bits/58_stybin.js | 67 +++++++++++++++++++++++++- bits/73_wsbin.js | 7 ++- bits/82_parsetab.js | 4 +- bits/90_utils.js | 5 +- package.json | 2 +- tests.lst | 10 ++++ tests/files | 2 +- xlsx.js | 103 +++++++++++++++++++++++++++++++++------- 13 files changed, 187 insertions(+), 35 deletions(-) diff --git a/bits/31_version.js b/bits/31_version.js index cdc4408..78e331d 100644 --- a/bits/31_version.js +++ b/bits/31_version.js @@ -1 +1 @@ -XLSX.version = '0.4.2'; +XLSX.version = '0.4.3'; diff --git a/bits/38_recordhopper.js b/bits/38_recordhopper.js index 4f12dc1..81be9e6 100644 --- a/bits/38_recordhopper.js +++ b/bits/38_recordhopper.js @@ -6,10 +6,9 @@ var recordhopper = function(data, cb) { var RT = data.read_shift(1); if(RT & 0x80) RT = (RT & 0x7F) + ((data.read_shift(1) & 0x7F)<<7); var R = RecordEnum[RT] || RecordEnum[0xFFFF]; - length = tmpbyte = data.read_shift(1); for(cntbyte = 1; cntbyte <4 && (tmpbyte & 0x80); ++cntbyte) length += ((tmpbyte = data.read_shift(1)) & 0x7F)<<(7*cntbyte); var d = R.f(data, length); - if(cb(d, R)) return; + if(cb(d, R, RT)) return; } }; diff --git a/bits/39_parsestructs.js b/bits/39_parsestructs.js index 3ef7a41..bfbf921 100644 --- a/bits/39_parsestructs.js +++ b/bits/39_parsestructs.js @@ -26,7 +26,7 @@ function parse_Cell(data) { var iStyleRef = data.read_shift(2); iStyleRef += data.read_shift(1) <<16; var fPhShow = data.read_shift(1); - return { c:col }; + return { c:col, iStyleRef: iStyleRef }; } /* [MS-XLSB] 2.5.21 */ diff --git a/bits/52_sstxml.js b/bits/52_sstxml.js index 65bf4ad..4f68ec4 100644 --- a/bits/52_sstxml.js +++ b/bits/52_sstxml.js @@ -15,13 +15,17 @@ var parse_rs = (function() { case '': break; /* 18.4.1 charset CT_IntProperty TODO */ case '': break; /* 18.4.5 rFont CT_FontName */ case ']*)>([\\s\\S]*)<\/sst>","m")); if(isval(sst)) { - s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si); + s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; }); sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; } return s; diff --git a/bits/57_styxml.js b/bits/57_styxml.js index 7d2a2e3..8f057e4 100644 --- a/bits/57_styxml.js +++ b/bits/57_styxml.js @@ -24,7 +24,8 @@ function parseCXfs(t) { case '': case '': break; /* 18.8.45 xf CT_Xf */ - case '': break; diff --git a/bits/58_stybin.js b/bits/58_stybin.js index b1076fc..c4c192f 100644 --- a/bits/58_stybin.js +++ b/bits/58_stybin.js @@ -1,3 +1,66 @@ -function parse_sty_bin(data) { - +function parse_BrtFmt(data, length) { + var ifmt = data.read_shift(2); + var stFmtCode = parse_XLWideString(data,length-2); + return [ifmt, stFmtCode]; +} + +function parse_BrtXF(data, length) { + var ixfeParent = data.read_shift(2); + var ifmt = data.read_shift(2); + parsenoop(data, length-4); + return {ixfe:ixfeParent, ifmt:ifmt }; +} + +function parse_sty_bin(data) { + styles.NumberFmt = []; + for(var y in SSF._table) styles.NumberFmt[y] = SSF._table[y]; + + styles.CellXf = []; + var state = ""; + var pass = false; + recordhopper(data, function(val, R, RT) { + switch(R.n) { + case 'BrtFmt': + styles.NumberFmt[val[0]] = val[1]; SSF.load(val[1], val[0]); + break; + case 'BrtFont': break; /* TODO */ + case 'BrtKnownFonts': break; /* TODO */ + case 'BrtFill': break; /* TODO */ + case 'BrtBorder': break; /* TODO */ + case 'BrtXF': + if(state === "CELLXFS") { + styles.CellXf.push(val); + } + break; /* TODO */ + case 'BrtStyle': break; /* TODO */ + case 'BrtRowHdr': break; /* TODO */ + case 'BrtCellMeta': break; /* ?? */ + case 'BrtBeginStyleSheet': break; + case 'BrtEndStyleSheet': break; + case 'BrtBeginFmts': state = "FMTS"; break; + case 'BrtEndFmts': state = ""; break; + case 'BrtBeginFonts': state = "FONTS"; break; + case 'BrtEndFonts': state = ""; break; + case 'BrtACBegin': state = "ACFONTS"; break; + case 'BrtACEnd': state = ""; break; + case 'BrtBeginFills': state = "FILLS"; break; + case 'BrtEndFills': state = ""; break; + case 'BrtBeginBorders': state = "BORDERS"; break; + case 'BrtEndBorders': state = ""; break; + case 'BrtBeginCellStyleXFs': state = "CELLSTYLEXFS"; break; + case 'BrtEndCellStyleXFs': state = ""; break; + case 'BrtBeginCellXFs': state = "CELLXFS"; break; + case 'BrtEndCellXFs': state = ""; break; + case 'BrtBeginStyles': state = "STYLES"; break; + case 'BrtEndStyles': state = ""; break; + case 'BrtBeginDXFs': state = "DXFS"; break; + case 'BrtEndDXFs': state = ""; break; + case 'BrtBeginTableStyles': state = "TABLESTYLES"; break; + case 'BrtEndTableStyles': state = ""; break; + case 'BrtFRTBegin': pass = true; break; + case 'BrtFRTEnd': pass = false; break; + //default: if(!pass) throw new Error("Unexpected record " + RT + " " + R.n); + } + }); + return styles; } diff --git a/bits/73_wsbin.js b/bits/73_wsbin.js index 9fa3769..f0e7194 100644 --- a/bits/73_wsbin.js +++ b/bits/73_wsbin.js @@ -115,6 +115,9 @@ var parse_ws_bin = function(data) { case 'str': if(p.v) p.v = utf8read(p.v); break; } if(val[3]) p.f = val[3]; + if(styles.CellXf[val[0].iStyleRef]) try { + p.w = SSF.format(styles.CellXf[val[0].iStyleRef].ifmt,p.v,_ssfopts); + } catch(e) { } s[encode_cell({c:val[0].c,r:row.r})] = p; break; // TODO @@ -144,8 +147,8 @@ var parse_ws_bin = function(data) { case 'BrtPrintOptions': break; // TODO case 'BrtMargins': break; // TODO case 'BrtPageSetup': break; // TODO - case 'BrtFRTBegin': pass = true; break; // TODO - case 'BrtFRTEnd': pass = false; break; // TODO + case 'BrtFRTBegin': pass = true; break; + case 'BrtFRTEnd': pass = false; break; case 'BrtEndSheet': break; // TODO //default: if(!pass) throw new Error("Unexpected record " + R.n); } diff --git a/bits/82_parsetab.js b/bits/82_parsetab.js index 044951a..d077adf 100644 --- a/bits/82_parsetab.js +++ b/bits/82_parsetab.js @@ -36,10 +36,10 @@ var RecordEnum = { 0x0028: { n:"BrtIndexRowBlock", f:parsenoop }, 0x002A: { n:"BrtIndexBlock", f:parsenoop }, 0x002B: { n:"BrtFont", f:parsenoop }, - 0x002C: { n:"BrtFmt", f:parsenoop }, + 0x002C: { n:"BrtFmt", f:parse_BrtFmt }, 0x002D: { n:"BrtFill", f:parsenoop }, 0x002E: { n:"BrtBorder", f:parsenoop }, - 0x002F: { n:"BrtXF", f:parsenoop }, + 0x002F: { n:"BrtXF", f:parse_BrtXF }, 0x0030: { n:"BrtStyle", f:parsenoop }, 0x0031: { n:"BrtCellMeta", f:parsenoop }, 0x0032: { n:"BrtValueMeta", f:parsenoop }, diff --git a/bits/90_utils.js b/bits/90_utils.js index 1903493..60f3454 100644 --- a/bits/90_utils.js +++ b/bits/90_utils.js @@ -33,8 +33,8 @@ function sheet_to_row_object_array(sheet, opts){ for (C = r.s.c; C <= r.e.c; ++C) { val = sheet[encode_cell({c: C,r: R})]; if(!val || !val.t) continue; - v = (val || {}).v; - switch(val.t){ + if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; } + else switch(val.t){ case 's': case 'str': case 'b': case 'n': if(val.v !== undefined) { row[hdr[C]] = val.v; @@ -53,6 +53,7 @@ function sheet_to_row_object_array(sheet, opts){ function sheet_to_csv(sheet, opts) { var stringify = function stringify(val) { if(!val.t) return ""; + if(typeof val.w !== 'undefined') return '"' + val.w.replace(/"/,'""') + '"'; switch(val.t){ case 'n': return String(val.v); case 's': case 'str': diff --git a/package.json b/package.json index 3d90c72..ce5c6f3 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "xlsx", - "version": "0.4.2", + "version": "0.4.3", "author": "sheetjs", "description": "XLSB / XLSX / XLSM parser", "keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ], diff --git a/tests.lst b/tests.lst index d6d633e..090af1f 100644 --- a/tests.lst +++ b/tests.lst @@ -1,4 +1,6 @@ formula_stress_test.xlsb +number_format.xlsb +rich_text_stress.xlsb time_stress_test_1.xlsb LONumbers-2010.xlsx LONumbers-2011.xlsx @@ -41,6 +43,7 @@ apachepoi_50846-border_colours.xlsx apachepoi_50867_with_table.xlsx apachepoi_51222.xlsx apachepoi_51470.xlsx +apachepoi_51585.xlsx apachepoi_51710.xlsx apachepoi_51850.xlsx apachepoi_51963.xlsx @@ -122,6 +125,7 @@ apachepoi_workbookProtection_workbook_structure_protected.xlsx apachepoi_workbookProtection_workbook_windows_protected.xlsx apachepoi_workbookProtection_worksheet_protected.xlsx apachepoi_xlsx-jdbc.xlsx +comments_stress_test.xlsx excel-reader-xlsx_data01.xlsx excel-reader-xlsx_data02.xlsx excel-reader-xlsx_error02.xlsx.pending @@ -163,6 +167,7 @@ openpyxl_r_date_1904.xlsx openpyxl_r_formulae.xlsx openpyxl_r_null_archive.xlsx.pending openpyxl_r_null_file.xlsx.pending +rich_text_stress.xlsx roo_1900_base.xlsx roo_1904_base.xlsx roo_Bibelbund.xlsx @@ -195,6 +200,7 @@ spreadsheet-parsexlsx_bug-11.xlsx spreadsheet-parsexlsx_bug-12.xlsx spreadsheet-parsexlsx_bug-13.xlsx spreadsheet-parsexlsx_bug-14.xlsx +spreadsheet-parsexlsx_bug-15.xlsx spreadsheet-parsexlsx_bug-2.xlsx spreadsheet-parsexlsx_bug-3.xlsx spreadsheet-parsexlsx_bug-4.xlsx @@ -203,11 +209,15 @@ spreadsheet-parsexlsx_bug-6-2.xlsx spreadsheet-parsexlsx_bug-6.xlsx spreadsheet-parsexlsx_bug-7.xlsx spreadsheet-parsexlsx_bug-8.xlsx +xlrd_merged_cells.xlsx xlrd_reveng1.xlsx +xlrd_test_comments_excel.xlsx +xlrd_test_comments_gdocs.xlsx xlrd_text_bar.xlsx חישוב_נקודות_זיכוי.xlsx apachepoi_45431.xlsm apachepoi_47026.xlsm apachepoi_47089.xlsm apachepoi_ExcelWithAttachments.xlsm +number_format.xlsm openpyxl_r_vba-test.xlsm diff --git a/tests/files b/tests/files index 51eb476..a9fc7e0 160000 --- a/tests/files +++ b/tests/files @@ -1 +1 @@ -Subproject commit 51eb4765cd8828155a72d5b2aa273e999daaa93e +Subproject commit a9fc7e00949b0f7621f0b51e0ed5b139bac8e109 diff --git a/xlsx.js b/xlsx.js index a0eb8a6..fd7ca39 100644 --- a/xlsx.js +++ b/xlsx.js @@ -420,7 +420,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa make_ssf(SSF); var XLSX = {}; (function(XLSX){ -XLSX.version = '0.4.2'; +XLSX.version = '0.4.3'; var current_codepage, current_cptable, cptable; if(typeof module !== "undefined" && typeof require !== 'undefined') { if(typeof cptable === 'undefined') cptable = require('codepage'); @@ -705,11 +705,10 @@ var recordhopper = function(data, cb) { var RT = data.read_shift(1); if(RT & 0x80) RT = (RT & 0x7F) + ((data.read_shift(1) & 0x7F)<<7); var R = RecordEnum[RT] || RecordEnum[0xFFFF]; - length = tmpbyte = data.read_shift(1); for(cntbyte = 1; cntbyte <4 && (tmpbyte & 0x80); ++cntbyte) length += ((tmpbyte = data.read_shift(1)) & 0x7F)<<(7*cntbyte); var d = R.f(data, length); - if(cb(d, R)) return; + if(cb(d, R, RT)) return; } }; /* [MS-XLSB] 2.1.7.121 */ @@ -740,7 +739,7 @@ function parse_Cell(data) { var iStyleRef = data.read_shift(2); iStyleRef += data.read_shift(1) <<16; var fPhShow = data.read_shift(1); - return { c:col }; + return { c:col, iStyleRef: iStyleRef }; } /* [MS-XLSB] 2.5.21 */ @@ -815,13 +814,17 @@ var parse_rs = (function() { case '': break; /* 18.4.1 charset CT_IntProperty TODO */ case '': break; /* 18.4.5 rFont CT_FontName */ case ']*)>([\\s\\S]*)<\/sst>","m")); if(isval(sst)) { - s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si); + s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; }); sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; } return s; @@ -985,7 +988,8 @@ function parseCXfs(t) { case '': case '': break; /* 18.8.45 xf CT_Xf */ - case '': break; @@ -1025,8 +1029,71 @@ function parse_styles(data) { return styles; } -function parse_sty_bin(data) { +function parse_BrtFmt(data, length) { + var ifmt = data.read_shift(2); + var stFmtCode = parse_XLWideString(data,length-2); + return [ifmt, stFmtCode]; +} +function parse_BrtXF(data, length) { + var ixfeParent = data.read_shift(2); + var ifmt = data.read_shift(2); + parsenoop(data, length-4); + return {ixfe:ixfeParent, ifmt:ifmt }; +} + +function parse_sty_bin(data) { + styles.NumberFmt = []; + for(var y in SSF._table) styles.NumberFmt[y] = SSF._table[y]; + + styles.CellXf = []; + var state = ""; + var pass = false; + recordhopper(data, function(val, R, RT) { + switch(R.n) { + case 'BrtFmt': + styles.NumberFmt[val[0]] = val[1]; SSF.load(val[1], val[0]); + break; + case 'BrtFont': break; /* TODO */ + case 'BrtKnownFonts': break; /* TODO */ + case 'BrtFill': break; /* TODO */ + case 'BrtBorder': break; /* TODO */ + case 'BrtXF': + if(state === "CELLXFS") { + styles.CellXf.push(val); + } + break; /* TODO */ + case 'BrtStyle': break; /* TODO */ + case 'BrtRowHdr': break; /* TODO */ + case 'BrtCellMeta': break; /* ?? */ + case 'BrtBeginStyleSheet': break; + case 'BrtEndStyleSheet': break; + case 'BrtBeginFmts': state = "FMTS"; break; + case 'BrtEndFmts': state = ""; break; + case 'BrtBeginFonts': state = "FONTS"; break; + case 'BrtEndFonts': state = ""; break; + case 'BrtACBegin': state = "ACFONTS"; break; + case 'BrtACEnd': state = ""; break; + case 'BrtBeginFills': state = "FILLS"; break; + case 'BrtEndFills': state = ""; break; + case 'BrtBeginBorders': state = "BORDERS"; break; + case 'BrtEndBorders': state = ""; break; + case 'BrtBeginCellStyleXFs': state = "CELLSTYLEXFS"; break; + case 'BrtEndCellStyleXFs': state = ""; break; + case 'BrtBeginCellXFs': state = "CELLXFS"; break; + case 'BrtEndCellXFs': state = ""; break; + case 'BrtBeginStyles': state = "STYLES"; break; + case 'BrtEndStyles': state = ""; break; + case 'BrtBeginDXFs': state = "DXFS"; break; + case 'BrtEndDXFs': state = ""; break; + case 'BrtBeginTableStyles': state = "TABLESTYLES"; break; + case 'BrtEndTableStyles': state = ""; break; + case 'BrtFRTBegin': pass = true; break; + case 'BrtFRTEnd': pass = false; break; + //default: if(!pass) throw new Error("Unexpected record " + RT + " " + R.n); + } + }); + return styles; } var ct2type = { @@ -1449,6 +1516,9 @@ var parse_ws_bin = function(data) { case 'str': if(p.v) p.v = utf8read(p.v); break; } if(val[3]) p.f = val[3]; + if(styles.CellXf[val[0].iStyleRef]) try { + p.w = SSF.format(styles.CellXf[val[0].iStyleRef].ifmt,p.v,_ssfopts); + } catch(e) { } s[encode_cell({c:val[0].c,r:row.r})] = p; break; // TODO @@ -1478,8 +1548,8 @@ var parse_ws_bin = function(data) { case 'BrtPrintOptions': break; // TODO case 'BrtMargins': break; // TODO case 'BrtPageSetup': break; // TODO - case 'BrtFRTBegin': pass = true; break; // TODO - case 'BrtFRTEnd': pass = false; break; // TODO + case 'BrtFRTBegin': pass = true; break; + case 'BrtFRTEnd': pass = false; break; case 'BrtEndSheet': break; // TODO //default: if(!pass) throw new Error("Unexpected record " + R.n); } @@ -1786,10 +1856,10 @@ var RecordEnum = { 0x0028: { n:"BrtIndexRowBlock", f:parsenoop }, 0x002A: { n:"BrtIndexBlock", f:parsenoop }, 0x002B: { n:"BrtFont", f:parsenoop }, - 0x002C: { n:"BrtFmt", f:parsenoop }, + 0x002C: { n:"BrtFmt", f:parse_BrtFmt }, 0x002D: { n:"BrtFill", f:parsenoop }, 0x002E: { n:"BrtBorder", f:parsenoop }, - 0x002F: { n:"BrtXF", f:parsenoop }, + 0x002F: { n:"BrtXF", f:parse_BrtXF }, 0x0030: { n:"BrtStyle", f:parsenoop }, 0x0031: { n:"BrtCellMeta", f:parsenoop }, 0x0032: { n:"BrtValueMeta", f:parsenoop }, @@ -2698,8 +2768,8 @@ function sheet_to_row_object_array(sheet, opts){ for (C = r.s.c; C <= r.e.c; ++C) { val = sheet[encode_cell({c: C,r: R})]; if(!val || !val.t) continue; - v = (val || {}).v; - switch(val.t){ + if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; } + else switch(val.t){ case 's': case 'str': case 'b': case 'n': if(val.v !== undefined) { row[hdr[C]] = val.v; @@ -2718,6 +2788,7 @@ function sheet_to_row_object_array(sheet, opts){ function sheet_to_csv(sheet, opts) { var stringify = function stringify(val) { if(!val.t) return ""; + if(typeof val.w !== 'undefined') return '"' + val.w.replace(/"/,'""') + '"'; switch(val.t){ case 'n': return String(val.v); case 's': case 'str':