From 5e9bca78f2b0c54242cefc8a358f3151232941ab Mon Sep 17 00:00:00 2001 From: eladxxx Date: Thu, 29 May 2014 09:18:23 +0300 Subject: [PATCH] Initial implementation of style/theme parsing. Huge thanks to @SheetJS for guidance on how to do all of this! Experimental, guarded by the cellStyles option (default: false) --- README.md | 2 + bits/56_stycommon.js | 2 + bits/57_styxml.js | 51 ++++++++- bits/59_theme.js | 182 ++++++++++++++++++++++++++++++ bits/72_wsxml.js | 12 +- bits/79_xmlbin.js | 4 + bits/84_defaults.js | 1 + bits/85_parsezip.js | 4 + xlsx.js | 256 ++++++++++++++++++++++++++++++++++++++++++- 9 files changed, 510 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7625477..73227f8 100644 --- a/README.md +++ b/README.md @@ -210,6 +210,7 @@ that does not start with `!` corresponds to a cell (using `A-1` notation). - `.c` : comments associated with the cell - `.z` : the number format string associated with the cell (if requested) - `.l` : the hyperlink of the cell (.Target holds link, .tooltip is tooltip) +- `.s` : the style/theme of the cell (if applicable) For dates, `.v` holds the raw date code from the sheet and `.w` holds the text @@ -222,6 +223,7 @@ The exported `read` and `readFile` functions accept an options argument: | cellFormula | true | Save formulae to the .f field | | cellHTML | true | Parse rich text and save HTML to the .h field | | cellNF | false | Save number format string to the .z field | +| cellStyles | false | Save style/theme info to the .s field | | sheetStubs | false | Create cell objects for stub cells | | sheetRows | 0 | If >0, read the first `sheetRows` rows ** | | bookDeps | false | If true, parse calculation chains | diff --git a/bits/56_stycommon.js b/bits/56_stycommon.js index e9863c8..861fe79 100644 --- a/bits/56_stycommon.js +++ b/bits/56_stycommon.js @@ -1,2 +1,4 @@ var styles = {}; // shared styles +var themes = {}; // shared themes + diff --git a/bits/57_styxml.js b/bits/57_styxml.js index 1cbb3d5..7391dbf 100644 --- a/bits/57_styxml.js +++ b/bits/57_styxml.js @@ -1,3 +1,48 @@ +/* 18.8.21 fills CT_Fills */ +function parse_fills(t, opts) { + styles.Fills = []; + var fill = {}; + t[0].match(/<[^>]*>/g).forEach(function(x) { + var y = parsexmltag(x); + switch(y[0]) { + case '': case '': break; + + /* 18.8.20 fill CT_Fill */ + case '': break; + case '': styles.Fills.push(fill); fill = {}; break; + + /* 18.8.32 patternFill CT_PatternFill */ + case '': break; + + /* 18.8.3 bgColor CT_Color */ + case '': break; + + /* 18.8.19 fgColor CT_Color */ + case '': break; + + default: if(opts.WTF) throw 'unrecognized ' + y[0] + ' in fills'; + } + }); +} + /* 18.8.31 numFmts CT_NumFmts */ function parse_numFmts(t, opts) { styles.NumberFmt = []; @@ -38,6 +83,7 @@ function parse_cellXfs(t, opts) { /* 18.8.45 xf CT_Xf */ case '': break; @@ -73,7 +119,10 @@ function parse_sty_xml(data, opts) { if((t=data.match(/]*)>.*<\/numFmts>/))) parse_numFmts(t, opts); /* fonts CT_Fonts ? */ - /* fills CT_Fills ? */ + + /* fills CT_Fills */ + if((t=data.match(/]*)>.*<\/fills>/))) parse_fills(t, opts); + /* borders CT_Borders ? */ /* cellStyleXfs CT_CellStyleXfs ? */ diff --git a/bits/59_theme.js b/bits/59_theme.js index 23255b6..93be245 100644 --- a/bits/59_theme.js +++ b/bits/59_theme.js @@ -1,3 +1,185 @@ RELS.THEME = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"; +/* Various RGB/HSL utility functions - might want to put these elsewhere. */ +/* From http://www.javascripter.net/faq/hextorgb.htm, usage: var X = hexToX('FFFFFF') */ +function cutHex(h) {return (h.charAt(0)=="#") ? h.substring(1,7):h} +function hexToR(h) {return parseInt((cutHex(h)).substring(0,2),16)} +function hexToG(h) {return parseInt((cutHex(h)).substring(2,4),16)} +function hexToB(h) {return parseInt((cutHex(h)).substring(4,6),16)} +/* From http://www.javascripter.net/faq/rgbtohex.htm, usage: var RGB = rgbToHex(R, G, B) */ +function toHex(n) { + n = parseInt(n,10); + if (isNaN(n)) return "00"; + n = Math.max(0,Math.min(n,255)); + return "0123456789ABCDEF".charAt((n-n%16)/16) + + "0123456789ABCDEF".charAt(n%16); +} +function rgbToHex(R,G,B) {return toHex(R)+toHex(G)+toHex(B)} +/* From the specification. */ +var HLSMAX = 255; +/* From https://gist.github.com/mjackson/5311256 via http://stackoverflow.com/a/9493060 */ +/** + * Converts an RGB color value to HSL. Conversion formula + * adapted from http://en.wikipedia.org/wiki/HSL_color_space. + * Assumes r, g, and b are contained in the set [0, 255] and + * returns h, s, and l in the set [0, 1]. + * + * @param Number r The red color value + * @param Number g The green color value + * @param Number b The blue color value + * @return Array The HSL representation + */ +function rgbToHsl(r, g, b){ + r /= 255, g /= 255, b /= 255; + var max = Math.max(r, g, b), min = Math.min(r, g, b); + var h, s, l = (max + min) / 2; + + if(max == min){ + h = s = 0; // achromatic + }else{ + var d = max - min; + s = l > 0.5 ? d / (2 - max - min) : d / (max + min); + switch(max){ + case r: h = (g - b) / d + (g < b ? 6 : 0); break; + case g: h = (b - r) / d + 2; break; + case b: h = (r - g) / d + 4; break; + } + h /= 6; + } + + return [h, s, l]; +} +/** + * Converts an HSL color value to RGB. Conversion formula + * adapted from http://en.wikipedia.org/wiki/HSL_color_space. + * Assumes h, s, and l are contained in the set [0, 1] and + * returns r, g, and b in the set [0, 255]. + * + * @param Number h The hue + * @param Number s The saturation + * @param Number l The lightness + * @return Array The RGB representation + */ +function hslToRgb(h, s, l){ + var r, g, b; + + if(s == 0){ + r = g = b = l; // achromatic + }else{ + function hue2rgb(p, q, t){ + if(t < 0) t += 1; + if(t > 1) t -= 1; + if(t < 1/6) return p + (q - p) * 6 * t; + if(t < 1/2) return q; + if(t < 2/3) return p + (q - p) * (2/3 - t) * 6; + return p; + } + + var q = l < 0.5 ? l * (1 + s) : l + s - l * s; + var p = 2 * l - q; + r = hue2rgb(p, q, h + 1/3); + g = hue2rgb(p, q, h); + b = hue2rgb(p, q, h - 1/3); + } + + return [Math.round(r * 255), Math.round(g * 255), Math.round(b * 255)]; +} +/* Utility function to apply tint to an RGB color. */ +function rgb_tint(rgb, tint) { + var r = hexToR(rgb), + g = hexToG(rgb), + b = hexToB(rgb), + hsl = rgbToHsl(r, g, b); + + /* Apply tint as described in pages 1757-1758 of the ECMA Office Open XML specification. */ + /* NOTE: This is totally messed up... see http://social.msdn.microsoft.com/Forums/en-US/e9d8c136-6d62-4098-9b1b-dac786149f43/excel-color-tint-algorithm-incorrect */ + if (tint < 0) { + hsl[2] = hsl[2] * (1.0 + tint); + } else if (tint > 0) { + hsl[2] = hsl[2] * (1.0 + tint); + + // XXX This doesn't work... + //hsl[2] = hsl[2] * (1.0 - tint) + (HLSMAX - HLSMAX * (1.0 - tint)); + } + + rgb = hslToRgb(hsl[0], hsl[1], hsl[2]); + + return rgbToHex(rgb[0], rgb[1], rgb[2]); +} + +function parse_clrScheme(t, opts) { + themes.themeElements.clrScheme = []; + var color = {}; + t[0].match(/<[^>]*>/g).forEach(function(x) { + var y = parsexmltag(x); + switch(y[0]) { + case '': case '': break; + + /* 20.1.2.3.32 srgbClr CT_SRgbColor */ + case '': + case '': + /* 20.1.4.1.10 dk2 (Dark 2) */ + case '': + case '': + /* 20.1.4.1.22 lt1 (Light 1) */ + case '': + case '': + /* 20.1.4.1.23 lt2 (Light 2) */ + case '': + case '': + /* 20.1.4.1.1 accent1 (Accent 1) */ + case '': + case '': + /* 20.1.4.1.2 accent2 (Accent 2) */ + case '': + case '': + /* 20.1.4.1.3 accent3 (Accent 3) */ + case '': + case '': + /* 20.1.4.1.4 accent4 (Accent 4) */ + case '': + case '': + /* 20.1.4.1.5 accent5 (Accent 5) */ + case '': + case '': + /* 20.1.4.1.6 accent6 (Accent 6) */ + case '': + case '': + /* 20.1.4.1.19 hlink (Hyperlink) */ + case '': + case '': + /* 20.1.4.1.15 folHlink (Followed Hyperlink) */ + case '': + case '': + if (y[0][1] === '/') { + themes.themeElements.clrScheme.push(color); + color = {}; + } else { + color.name = y[0].substring(3, y[0].length - 1); + } + break; + + default: if(opts.WTF) throw 'unrecognized ' + y[0] + ' in clrScheme'; + } + }); +} + +/* 14.2.7 Theme Part */ +function parse_theme_xml(data, opts) { + themes.themeElements = {}; + + var t; + + /* clrScheme */ + if((t=data.match(/]*)>.*<\/a:clrScheme>/))) parse_clrScheme(t, opts); + + return themes; +} + function write_theme() { return '\n'; } diff --git a/bits/72_wsxml.js b/bits/72_wsxml.js index 93406ae..89b0b82 100644 --- a/bits/72_wsxml.js +++ b/bits/72_wsxml.js @@ -73,14 +73,24 @@ function parse_ws_xml(data, opts, rels) { } /* formatting */ - var fmtid = 0; + var fmtid = 0, fillid = 0; if(cell.s && styles.CellXf) { var cf = styles.CellXf[cell.s]; if(cf && cf.numFmtId) fmtid = cf.numFmtId; + if(opts.cellStyles && cf && cf.fillId) fillid = cf.fillId; } try { p.w = SSF.format(fmtid,p.v,_ssfopts); if(opts.cellNF) p.z = SSF._table[fmtid]; + if(fillid) { + p.s = styles.Fills[fillid]; + if (p.s.fgColor && p.s.fgColor.theme) { + p.s.fgColor.rgb = rgb_tint(themes.themeElements.clrScheme[p.s.fgColor.theme].rgb, p.s.fgColor.tint || 0); + } + if (p.s.bgColor && p.s.bgColor.theme) { + p.s.bgColor.rgb = rgb_tint(themes.themeElements.clrScheme[p.s.bgColor.theme].rgb, p.s.bgColor.tint || 0); + } + } } catch(e) { if(opts.WTF) throw e; } s[cell.r] = p; }); diff --git a/bits/79_xmlbin.js b/bits/79_xmlbin.js index 37c8bd0..77d1e78 100644 --- a/bits/79_xmlbin.js +++ b/bits/79_xmlbin.js @@ -10,6 +10,10 @@ function parse_sty(data, name, opts) { return (name.substr(-4)===".bin" ? parse_sty_bin : parse_sty_xml)(data, opts); } +function parse_theme(data, name, opts) { + return parse_theme_xml(data, opts); +} + function parse_sst(data, name, opts) { return (name.substr(-4)===".bin" ? parse_sst_bin : parse_sst_xml)(data, opts); } diff --git a/bits/84_defaults.js b/bits/84_defaults.js index 1e48ac3..b407313 100644 --- a/bits/84_defaults.js +++ b/bits/84_defaults.js @@ -11,6 +11,7 @@ var fix_read_opts = fix_opts([ ['cellNF', false], /* emit cell number format string as .z */ ['cellHTML', true], /* emit html string as .h */ ['cellFormula', true], /* emit formulae as .f */ + ['cellStyles', false], /* emits style/theme as .s */ ['sheetStubs', false], /* emit empty cells */ ['sheetRows', 0, 'n'], /* read n rows (0 = read all rows) */ diff --git a/bits/85_parsezip.js b/bits/85_parsezip.js index a983d2a..da5a639 100644 --- a/bits/85_parsezip.js +++ b/bits/85_parsezip.js @@ -25,6 +25,9 @@ function parse_zip(zip, opts) { styles = {}; if(dir.style) styles = parse_sty(getzipdata(zip, dir.style.replace(/^\//,'')),dir.style, opts); + + themes = {}; + if(opts.cellStyles && dir.themes) themes = parse_theme(getzipdata(zip, dir.themes[0].replace(/^\//,'')),dir.themes[0], opts); } var wb = parse_wb(getzipdata(zip, dir.workbooks[0].replace(/^\//,'')), dir.workbooks[0], opts); @@ -105,6 +108,7 @@ function parse_zip(zip, opts) { SheetNames: props.SheetNames, Strings: strs, Styles: styles, + Themes: themes, SSF: SSF.get_table() }; if(opts.bookFiles) { diff --git a/xlsx.js b/xlsx.js index 07913f8..07b0fa2 100644 --- a/xlsx.js +++ b/xlsx.js @@ -1729,6 +1729,53 @@ var parse_sst_bin = function(data, opts) { var write_sst_bin = function(sst, opts) { }; var styles = {}; // shared styles +var themes = {}; // shared themes + +/* 18.8.21 fills CT_Fills */ +function parse_fills(t, opts) { + styles.Fills = []; + var fill = {}; + t[0].match(/<[^>]*>/g).forEach(function(x) { + var y = parsexmltag(x); + switch(y[0]) { + case '': case '': break; + + /* 18.8.20 fill CT_Fill */ + case '': break; + case '': styles.Fills.push(fill); fill = {}; break; + + /* 18.8.32 patternFill CT_PatternFill */ + case '': break; + + /* 18.8.3 bgColor CT_Color */ + case '': break; + + /* 18.8.19 fgColor CT_Color */ + case '': break; + + default: if(opts.WTF) throw 'unrecognized ' + y[0] + ' in fills'; + } + }); +} + /* 18.8.31 numFmts CT_NumFmts */ function parse_numFmts(t, opts) { styles.NumberFmt = []; @@ -1769,6 +1816,7 @@ function parse_cellXfs(t, opts) { /* 18.8.45 xf CT_Xf */ case '': break; @@ -1804,7 +1852,10 @@ function parse_sty_xml(data, opts) { if((t=data.match(/]*)>.*<\/numFmts>/))) parse_numFmts(t, opts); /* fonts CT_Fonts ? */ - /* fills CT_Fills ? */ + + /* fills CT_Fills */ + if((t=data.match(/]*)>.*<\/fills>/))) parse_fills(t, opts); + /* borders CT_Borders ? */ /* cellStyleXfs CT_CellStyleXfs ? */ @@ -1962,6 +2013,188 @@ function parse_sty_bin(data, opts) { } RELS.THEME = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"; +/* Various RGB/HSL utility functions - might want to put these elsewhere. */ +/* From http://www.javascripter.net/faq/hextorgb.htm, usage: var X = hexToX('FFFFFF') */ +function cutHex(h) {return (h.charAt(0)=="#") ? h.substring(1,7):h} +function hexToR(h) {return parseInt((cutHex(h)).substring(0,2),16)} +function hexToG(h) {return parseInt((cutHex(h)).substring(2,4),16)} +function hexToB(h) {return parseInt((cutHex(h)).substring(4,6),16)} +/* From http://www.javascripter.net/faq/rgbtohex.htm, usage: var RGB = rgbToHex(R, G, B) */ +function toHex(n) { + n = parseInt(n,10); + if (isNaN(n)) return "00"; + n = Math.max(0,Math.min(n,255)); + return "0123456789ABCDEF".charAt((n-n%16)/16) + + "0123456789ABCDEF".charAt(n%16); +} +function rgbToHex(R,G,B) {return toHex(R)+toHex(G)+toHex(B)} +/* From the specification. */ +var HLSMAX = 255; +/* From https://gist.github.com/mjackson/5311256 via http://stackoverflow.com/a/9493060 */ +/** + * Converts an RGB color value to HSL. Conversion formula + * adapted from http://en.wikipedia.org/wiki/HSL_color_space. + * Assumes r, g, and b are contained in the set [0, 255] and + * returns h, s, and l in the set [0, 1]. + * + * @param Number r The red color value + * @param Number g The green color value + * @param Number b The blue color value + * @return Array The HSL representation + */ +function rgbToHsl(r, g, b){ + r /= 255, g /= 255, b /= 255; + var max = Math.max(r, g, b), min = Math.min(r, g, b); + var h, s, l = (max + min) / 2; + + if(max == min){ + h = s = 0; // achromatic + }else{ + var d = max - min; + s = l > 0.5 ? d / (2 - max - min) : d / (max + min); + switch(max){ + case r: h = (g - b) / d + (g < b ? 6 : 0); break; + case g: h = (b - r) / d + 2; break; + case b: h = (r - g) / d + 4; break; + } + h /= 6; + } + + return [h, s, l]; +} +/** + * Converts an HSL color value to RGB. Conversion formula + * adapted from http://en.wikipedia.org/wiki/HSL_color_space. + * Assumes h, s, and l are contained in the set [0, 1] and + * returns r, g, and b in the set [0, 255]. + * + * @param Number h The hue + * @param Number s The saturation + * @param Number l The lightness + * @return Array The RGB representation + */ +function hslToRgb(h, s, l){ + var r, g, b; + + if(s == 0){ + r = g = b = l; // achromatic + }else{ + function hue2rgb(p, q, t){ + if(t < 0) t += 1; + if(t > 1) t -= 1; + if(t < 1/6) return p + (q - p) * 6 * t; + if(t < 1/2) return q; + if(t < 2/3) return p + (q - p) * (2/3 - t) * 6; + return p; + } + + var q = l < 0.5 ? l * (1 + s) : l + s - l * s; + var p = 2 * l - q; + r = hue2rgb(p, q, h + 1/3); + g = hue2rgb(p, q, h); + b = hue2rgb(p, q, h - 1/3); + } + + return [Math.round(r * 255), Math.round(g * 255), Math.round(b * 255)]; +} +/* Utility function to apply tint to an RGB color. */ +function rgb_tint(rgb, tint) { + var r = hexToR(rgb), + g = hexToG(rgb), + b = hexToB(rgb), + hsl = rgbToHsl(r, g, b); + + /* Apply tint as described in pages 1757-1758 of the ECMA Office Open XML specification. */ + /* NOTE: This is totally messed up... see http://social.msdn.microsoft.com/Forums/en-US/e9d8c136-6d62-4098-9b1b-dac786149f43/excel-color-tint-algorithm-incorrect */ + if (tint < 0) { + hsl[2] = hsl[2] * (1.0 + tint); + } else if (tint > 0) { + hsl[2] = hsl[2] * (1.0 + tint); + + // XXX This doesn't work... + //hsl[2] = hsl[2] * (1.0 - tint) + (HLSMAX - HLSMAX * (1.0 - tint)); + } + + rgb = hslToRgb(hsl[0], hsl[1], hsl[2]); + + return rgbToHex(rgb[0], rgb[1], rgb[2]); +} + +function parse_clrScheme(t, opts) { + themes.themeElements.clrScheme = []; + var color = {}; + t[0].match(/<[^>]*>/g).forEach(function(x) { + var y = parsexmltag(x); + switch(y[0]) { + case '': case '': break; + + /* 20.1.2.3.32 srgbClr CT_SRgbColor */ + case '': + case '': + /* 20.1.4.1.10 dk2 (Dark 2) */ + case '': + case '': + /* 20.1.4.1.22 lt1 (Light 1) */ + case '': + case '': + /* 20.1.4.1.23 lt2 (Light 2) */ + case '': + case '': + /* 20.1.4.1.1 accent1 (Accent 1) */ + case '': + case '': + /* 20.1.4.1.2 accent2 (Accent 2) */ + case '': + case '': + /* 20.1.4.1.3 accent3 (Accent 3) */ + case '': + case '': + /* 20.1.4.1.4 accent4 (Accent 4) */ + case '': + case '': + /* 20.1.4.1.5 accent5 (Accent 5) */ + case '': + case '': + /* 20.1.4.1.6 accent6 (Accent 6) */ + case '': + case '': + /* 20.1.4.1.19 hlink (Hyperlink) */ + case '': + case '': + /* 20.1.4.1.15 folHlink (Followed Hyperlink) */ + case '': + case '': + if (y[0][1] === '/') { + themes.themeElements.clrScheme.push(color); + color = {}; + } else { + color.name = y[0].substring(3, y[0].length - 1); + } + break; + + default: if(opts.WTF) throw 'unrecognized ' + y[0] + ' in clrScheme'; + } + }); +} + +/* 14.2.7 Theme Part */ +function parse_theme_xml(data, opts) { + themes.themeElements = {}; + + var t; + + /* clrScheme */ + if((t=data.match(/]*)>.*<\/a:clrScheme>/))) parse_clrScheme(t, opts); + + return themes; +} + function write_theme() { return '\n'; } /* 18.6 Calculation Chain */ function parse_cc_xml(data, opts) { @@ -2222,14 +2455,24 @@ function parse_ws_xml(data, opts, rels) { } /* formatting */ - var fmtid = 0; + var fmtid = 0, fillid = 0; if(cell.s && styles.CellXf) { var cf = styles.CellXf[cell.s]; if(cf && cf.numFmtId) fmtid = cf.numFmtId; + if(opts.cellStyles && cf && cf.fillId) fillid = cf.fillId; } try { p.w = SSF.format(fmtid,p.v,_ssfopts); if(opts.cellNF) p.z = SSF._table[fmtid]; + if(fillid) { + p.s = styles.Fills[fillid]; + if (p.s.fgColor && p.s.fgColor.theme) { + p.s.fgColor.rgb = rgb_tint(themes.themeElements.clrScheme[p.s.fgColor.theme].rgb, p.s.fgColor.tint || 0); + } + if (p.s.bgColor && p.s.bgColor.theme) { + p.s.bgColor.rgb = rgb_tint(themes.themeElements.clrScheme[p.s.bgColor.theme].rgb, p.s.bgColor.tint || 0); + } + } } catch(e) { if(opts.WTF) throw e; } s[cell.r] = p; }); @@ -2970,6 +3213,10 @@ function parse_sty(data, name, opts) { return (name.substr(-4)===".bin" ? parse_sty_bin : parse_sty_xml)(data, opts); } +function parse_theme(data, name, opts) { + return parse_theme_xml(data, opts); +} + function parse_sst(data, name, opts) { return (name.substr(-4)===".bin" ? parse_sst_bin : parse_sst_xml)(data, opts); } @@ -3842,6 +4089,7 @@ var fix_read_opts = fix_opts([ ['cellNF', false], /* emit cell number format string as .z */ ['cellHTML', true], /* emit html string as .h */ ['cellFormula', true], /* emit formulae as .f */ + ['cellStyles', false], /* emits style/theme as .s */ ['sheetStubs', false], /* emit empty cells */ ['sheetRows', 0, 'n'], /* read n rows (0 = read all rows) */ @@ -3890,6 +4138,9 @@ function parse_zip(zip, opts) { styles = {}; if(dir.style) styles = parse_sty(getzipdata(zip, dir.style.replace(/^\//,'')),dir.style, opts); + + themes = {}; + if(opts.cellStyles && dir.themes) themes = parse_theme(getzipdata(zip, dir.themes[0].replace(/^\//,'')),dir.themes[0], opts); } var wb = parse_wb(getzipdata(zip, dir.workbooks[0].replace(/^\//,'')), dir.workbooks[0], opts); @@ -3970,6 +4221,7 @@ function parse_zip(zip, opts) { SheetNames: props.SheetNames, Strings: strs, Styles: styles, + Themes: themes, SSF: SSF.get_table() }; if(opts.bookFiles) {