From d0e9ff9b3dacf7dd105748104d7b4748537e1a76 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Thu, 19 Jun 2014 23:53:37 -0400 Subject: [PATCH] version bump 0.2.0: safari performance On Safari, bit twiddling is 50% faster than directly evaluating mod 65521 --- README.md | 11 +++++++++++ adler32.js | 26 +++++++++++++------------- bits/40_adler.js | 26 +++++++++++++------------- ctest/adler32.js | 26 +++++++++++++------------- package.json | 2 +- perf/bstr.js | 48 +++++++++++++++++++++++++++++++++++++----------- 6 files changed, 88 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 57309c8..5c61f2a 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,17 @@ decisions in the code). [js-crc](http://git.io/crc32) has more performance notes +Bit twiddling is much faster than taking the mod on Safari and older Firefoxes. +Instead of taking the literal mod 65521, it is faster to keep it in the integers +by bit-shifting: `65536 ~ 15 mod 65521` so for nonnegative integer `a`: + +``` + a = (a >>> 16) * 65536 + (a & 65535) [equality] + a ~ (a >>> 16) * 15 + (a & 65535) mod 65521 +``` + +The mod is taken at the very end, since the intermediate result may exceed 65521 + ## Magic Number The magic numbers were chosen so as to not overflow a 31-bit integer: diff --git a/adler32.js b/adler32.js index 5eeec9a..0993821 100644 --- a/adler32.js +++ b/adler32.js @@ -10,29 +10,29 @@ function adler32_bstr(bstr) { if(bstr.length > 32768) if(use_buffer) return adler32_buf(Buffer(bstr)); var a = 1, b = 0, L = bstr.length, M; for(var i = 0; i < L;) { - M = Math.min(L-i, 3854); - for(;M>0;--M) { - a += bstr.charCodeAt(i++); + M = Math.min(L-i, 3850)+i; + for(;i>>16)+(a&65535)) + b = (15*(b>>>16)+(b&65535)) } - return b > 32767 ? (((b - 65536) * 65536) | a) : ((b * 65536) | a); + return ((b%65521) << 16) | (a%65521); } function adler32_buf(buf) { var a = 1, b = 0, L = buf.length, M; for(var i = 0; i < L;) { - M = Math.min(L-i, 3854); - for(;M>0;--M) { - a += buf[i++]; + M = Math.min(L-i, 3850)+i; + for(;i>>16)+(a&65535)) + b = (15*(b>>>16)+(b&65535)) } - return b > 32767 ? (((b - 65536) * 65536) | a) : ((b * 65536) | a); + return ((b%65521) << 16) | (a%65521); } /* much much faster to intertwine utf8 and adler */ @@ -61,7 +61,7 @@ function adler32_str(str) { a %= 65521; b %= 65521; } - return b > 32767 ? (((b - 65536) * 65536) | a) : ((b * 65536) | a); + return (b << 16) | a; } ADLER32.bstr = adler32_bstr; ADLER32.buf = adler32_buf; diff --git a/bits/40_adler.js b/bits/40_adler.js index ea3a60b..dac4cc8 100644 --- a/bits/40_adler.js +++ b/bits/40_adler.js @@ -5,29 +5,29 @@ function adler32_bstr(bstr) { if(bstr.length > 32768) if(use_buffer) return adler32_buf(Buffer(bstr)); var a = 1, b = 0, L = bstr.length, M; for(var i = 0; i < L;) { - M = Math.min(L-i, 3854); - for(;M>0;--M) { - a += bstr.charCodeAt(i++); + M = Math.min(L-i, 3850)+i; + for(;i>>16)+(a&65535)) + b = (15*(b>>>16)+(b&65535)) } - return b > 32767 ? (((b - 65536) * 65536) | a) : ((b * 65536) | a); + return ((b%65521) << 16) | (a%65521); } function adler32_buf(buf) { var a = 1, b = 0, L = buf.length, M; for(var i = 0; i < L;) { - M = Math.min(L-i, 3854); - for(;M>0;--M) { - a += buf[i++]; + M = Math.min(L-i, 3850)+i; + for(;i>>16)+(a&65535)) + b = (15*(b>>>16)+(b&65535)) } - return b > 32767 ? (((b - 65536) * 65536) | a) : ((b * 65536) | a); + return ((b%65521) << 16) | (a%65521); } /* much much faster to intertwine utf8 and adler */ @@ -56,5 +56,5 @@ function adler32_str(str) { a %= 65521; b %= 65521; } - return b > 32767 ? (((b - 65536) * 65536) | a) : ((b * 65536) | a); + return (b << 16) | a; } diff --git a/ctest/adler32.js b/ctest/adler32.js index 5eeec9a..0993821 100644 --- a/ctest/adler32.js +++ b/ctest/adler32.js @@ -10,29 +10,29 @@ function adler32_bstr(bstr) { if(bstr.length > 32768) if(use_buffer) return adler32_buf(Buffer(bstr)); var a = 1, b = 0, L = bstr.length, M; for(var i = 0; i < L;) { - M = Math.min(L-i, 3854); - for(;M>0;--M) { - a += bstr.charCodeAt(i++); + M = Math.min(L-i, 3850)+i; + for(;i>>16)+(a&65535)) + b = (15*(b>>>16)+(b&65535)) } - return b > 32767 ? (((b - 65536) * 65536) | a) : ((b * 65536) | a); + return ((b%65521) << 16) | (a%65521); } function adler32_buf(buf) { var a = 1, b = 0, L = buf.length, M; for(var i = 0; i < L;) { - M = Math.min(L-i, 3854); - for(;M>0;--M) { - a += buf[i++]; + M = Math.min(L-i, 3850)+i; + for(;i>>16)+(a&65535)) + b = (15*(b>>>16)+(b&65535)) } - return b > 32767 ? (((b - 65536) * 65536) | a) : ((b * 65536) | a); + return ((b%65521) << 16) | (a%65521); } /* much much faster to intertwine utf8 and adler */ @@ -61,7 +61,7 @@ function adler32_str(str) { a %= 65521; b %= 65521; } - return b > 32767 ? (((b - 65536) * 65536) | a) : ((b * 65536) | a); + return (b << 16) | a; } ADLER32.bstr = adler32_bstr; ADLER32.buf = adler32_buf; diff --git a/package.json b/package.json index bea5a8c..7309772 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "adler-32", - "version": "0.1.0", + "version": "0.2.0", "author": "sheetjs", "description": "Pure-JS ADLER-32", "keywords": [ "adler32", "checksum" ], diff --git a/perf/bstr.js b/perf/bstr.js index 3afada7..1850cf3 100644 --- a/perf/bstr.js +++ b/perf/bstr.js @@ -1,5 +1,3 @@ -var table = require('../').table; - function sheetjs1(bstr) { var a = 1, b = 0, L = bstr.length; for(var i = 0; i < L;) { @@ -14,7 +12,21 @@ function sheetjs1(bstr) { function sheetjs2(bstr) { var a = 1, b = 0, L = bstr.length, M; for(var i = 0; i < L;) { - M = Math.min(L-i, 3854); + M = Math.min(L-i, 3850)+i; + for(;i>>16)+(a&65535)) + b = (15*(b>>>16)+(b&65535)) + } + return ((b%65521) << 16) | (a%65521); +} + +function sheetjs3(bstr) { + var a = 1, b = 0, L = bstr.length, M; + for(var i = 0; i < L;) { + M = Math.min(L-i, 5552); for(;M>0;--M) { a += bstr.charCodeAt(i++); b += a; @@ -25,14 +37,28 @@ function sheetjs2(bstr) { return (b << 16) | a; } -var foobar = "foobarbazqux"; -for(var i = 0; i != 11; ++i) foobar += " " + foobar; +var foobar = [255,255,255,255,255,255].map(function(x) { return String.fromCharCode(x); }).join(""); +foobar += foobar; +foobar += foobar; +foobar += foobar; +foobar += foobar; +foobar += foobar; +foobar += foobar; +foobar.charCodeAt(0); +var m = 2048; var assert = require('assert'); -assert.equal(sheetjs1(foobar), sheetjs2(foobar)); - var BM = require('./bm'); -var suite = new BM('binary string'); +for(var i = 0; i != 14; ++i) { + foobar += foobar; + foobar.charCodeAt(0); + assert.equal(sheetjs1(foobar), sheetjs3(foobar)); + assert.equal(sheetjs1(foobar), sheetjs2(foobar)); + //for(var j = 0; j != 200; ++j) assert.equal(sheetjs2(foobar), sheetjs3(foobar)); + var suite = new BM('binary string (' + foobar.length + ')'); -suite.add('sheetjs 1', function() { for(var j = 0; j != 1000; ++j) sheetjs1(foobar); }); -suite.add('sheetjs 2', function() { for(var j = 0; j != 1000; ++j) sheetjs2(foobar); }); -suite.run(); + if(i<3) suite.add('sheetjs 1', function() { for(var j = 0; j != m; ++j) sheetjs1(foobar); }); + suite.add('sheetjs 2', function() { for(var j = 0; j != m; ++j) sheetjs2(foobar); }); + suite.add('sheetjs 3', function() { for(var j = 0; j != m; ++j) sheetjs3(foobar); }); + suite.run(); + m>>>=1; if(m < 10) m = 10; +}