From 79a265b6622ec3547a371928e5f1c4714f48e91f Mon Sep 17 00:00:00 2001 From: SheetJS Date: Tue, 12 Jan 2016 22:30:35 -0500 Subject: [PATCH] version bump 0.4.0 - added missing bitshift (fixes #5) - brute-force unicode tests --- .gitignore | 3 +++ .npmignore | 4 ++++ .travis.yml | 1 + LICENSE | 2 +- Makefile | 11 +++++++++-- README.md | 3 +++ bits/00_header.js | 4 ++-- bits/01_version.js | 2 +- bits/40_crc.js | 2 +- crc32.flow.js | 8 ++++---- crc32.js | 8 ++++---- ctest/crc32.js | 8 ++++---- ctest/test.js | 21 ++++++++++++++++++++- misc/make_baseline.sh | 39 +++++++++++++++++++++++++++++++++++++++ misc/make_unicode_crc.njs | 5 +++++ misc/make_unicode_crc.py | 19 +++++++++++++++++++ misc/spin.sh | 2 +- package.json | 3 ++- perf/bm.js | 2 +- test.js | 21 ++++++++++++++++++++- test_files/uccat.txt | 38 ++++++++++++++++++++++++++++++++++++++ 21 files changed, 182 insertions(+), 24 deletions(-) create mode 100755 misc/make_baseline.sh create mode 100644 misc/make_unicode_crc.njs create mode 100644 misc/make_unicode_crc.py create mode 100644 test_files/uccat.txt diff --git a/.gitignore b/.gitignore index 4247ef3..4385eea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ node_modules +test_files/*.py +test_files/*.js +test_files/baseline* misc/coverage.html misc/*/ diff --git a/.npmignore b/.npmignore index d5be5ab..ab99f52 100644 --- a/.npmignore +++ b/.npmignore @@ -3,8 +3,12 @@ misc/ perf/ bits/ ctest/ +test_files/ test.js .travis.yml .jscs.json +.jshintrc +.flowconfig +.npmignore perf.txt Makefile diff --git a/.travis.yml b/.travis.yml index 2faaecc..42fac65 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ node_js: before_install: - "npm install -g npm@next" - "npm install -g mocha crc-32 benchmark ansi" + - "npm install codepage" - "npm install blanket" - "npm install coveralls mocha-lcov-reporter" after_success: diff --git a/LICENSE b/LICENSE index c004c7c..9fd9d56 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2014 SheetJS +Copyright (C) 2014-present SheetJS Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/Makefile b/Makefile index b87f11e..4940c9f 100644 --- a/Makefile +++ b/Makefile @@ -20,11 +20,11 @@ bits/01_version.js: package.json echo "CRC32.version = '"`grep version package.json | awk '{gsub(/[^0-9a-z\.-]/,"",$$2); print $$2}'`"';" > $@ .PHONY: clean -clean: +clean: clean-baseline rm -f $(TARGET) .PHONY: test mocha -test mocha: test.js +test mocha: test.js $(TARGET) baseline mocha -R spec -t 20000 .PHONY: ctest @@ -70,3 +70,10 @@ perf: .PHONY: perf-all perf-all: bash misc/perf.sh + +.PHONY: baseline clean-baseline +baseline: + ./misc/make_baseline.sh + +clean-baseline: + rm -f test_files/*.* diff --git a/README.md b/README.md index beb5476..30d5233 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,9 @@ For example: To run the in-browser tests, run a local server and go to the `ctest` directory. To update the browser artifacts, run `make ctest`. +`make baseline` will generate baseline files based on the unicode mapping at + + ## License Please consult the attached LICENSE file for details. All rights not explicitly diff --git a/bits/00_header.js b/bits/00_header.js index eef3f05..f51169d 100644 --- a/bits/00_header.js +++ b/bits/00_header.js @@ -1,4 +1,4 @@ -/* crc32.js (C) 2014-2015 SheetJS -- http://sheetjs.com */ +/* crc32.js (C) 2014-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ var CRC32; /*:: declare var DO_NOT_EXPORT_CRC: any; */ @@ -14,7 +14,7 @@ var CRC32; return module; }); } else { - factory(CRC32 = {}); + factory(CRC32 = {}); } } else { factory(CRC32 = {}); diff --git a/bits/01_version.js b/bits/01_version.js index c88838f..dad4ead 100644 --- a/bits/01_version.js +++ b/bits/01_version.js @@ -1 +1 @@ -CRC32.version = '0.3.0'; +CRC32.version = '0.4.0'; diff --git a/bits/40_crc.js b/bits/40_crc.js index cf8a719..30954aa 100644 --- a/bits/40_crc.js +++ b/bits/40_crc.js @@ -51,7 +51,7 @@ function crc32_str(str/*:string*/)/*:CRC32Type*/ { c = (c&1023)+64; d = str.charCodeAt(i++) & 1023; crc = (crc >>> 8) ^ table[(crc ^ (240|((c>>8)&7))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>2)&63))) & 0xFF]; - crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|(c&3))) & 0xFF]; + crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|((c&3)<<4))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(d&63))) & 0xFF]; } else { crc = (crc >>> 8) ^ table[(crc ^ (224|((c>>12)&15))) & 0xFF]; diff --git a/crc32.flow.js b/crc32.flow.js index cd5418a..551605d 100644 --- a/crc32.flow.js +++ b/crc32.flow.js @@ -1,4 +1,4 @@ -/* crc32.js (C) 2014-2015 SheetJS -- http://sheetjs.com */ +/* crc32.js (C) 2014-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ var CRC32; /*:: declare var DO_NOT_EXPORT_CRC: any; */ @@ -14,13 +14,13 @@ var CRC32; return module; }); } else { - factory(CRC32 = {}); + factory(CRC32 = {}); } } else { factory(CRC32 = {}); } }(function(CRC32) { -CRC32.version = '0.3.0'; +CRC32.version = '0.4.0'; /*:: type CRC32Type = number; type ABuf = Array | Buffer; @@ -100,7 +100,7 @@ function crc32_str(str/*:string*/)/*:CRC32Type*/ { c = (c&1023)+64; d = str.charCodeAt(i++) & 1023; crc = (crc >>> 8) ^ table[(crc ^ (240|((c>>8)&7))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>2)&63))) & 0xFF]; - crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|(c&3))) & 0xFF]; + crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|((c&3)<<4))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(d&63))) & 0xFF]; } else { crc = (crc >>> 8) ^ table[(crc ^ (224|((c>>12)&15))) & 0xFF]; diff --git a/crc32.js b/crc32.js index a1ca898..52a5a68 100644 --- a/crc32.js +++ b/crc32.js @@ -1,4 +1,4 @@ -/* crc32.js (C) 2014-2015 SheetJS -- http://sheetjs.com */ +/* crc32.js (C) 2014-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ var CRC32; (function (factory) { @@ -12,13 +12,13 @@ var CRC32; return module; }); } else { - factory(CRC32 = {}); + factory(CRC32 = {}); } } else { factory(CRC32 = {}); } }(function(CRC32) { -CRC32.version = '0.3.0'; +CRC32.version = '0.4.0'; /* see perf/crc32table.js */ function signed_crc_table() { var c = 0, table = new Array(256); @@ -93,7 +93,7 @@ function crc32_str(str) { c = (c&1023)+64; d = str.charCodeAt(i++) & 1023; crc = (crc >>> 8) ^ table[(crc ^ (240|((c>>8)&7))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>2)&63))) & 0xFF]; - crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|(c&3))) & 0xFF]; + crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|((c&3)<<4))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(d&63))) & 0xFF]; } else { crc = (crc >>> 8) ^ table[(crc ^ (224|((c>>12)&15))) & 0xFF]; diff --git a/ctest/crc32.js b/ctest/crc32.js index a1ca898..52a5a68 100644 --- a/ctest/crc32.js +++ b/ctest/crc32.js @@ -1,4 +1,4 @@ -/* crc32.js (C) 2014-2015 SheetJS -- http://sheetjs.com */ +/* crc32.js (C) 2014-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ var CRC32; (function (factory) { @@ -12,13 +12,13 @@ var CRC32; return module; }); } else { - factory(CRC32 = {}); + factory(CRC32 = {}); } } else { factory(CRC32 = {}); } }(function(CRC32) { -CRC32.version = '0.3.0'; +CRC32.version = '0.4.0'; /* see perf/crc32table.js */ function signed_crc_table() { var c = 0, table = new Array(256); @@ -93,7 +93,7 @@ function crc32_str(str) { c = (c&1023)+64; d = str.charCodeAt(i++) & 1023; crc = (crc >>> 8) ^ table[(crc ^ (240|((c>>8)&7))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>2)&63))) & 0xFF]; - crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|(c&3))) & 0xFF]; + crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|((c&3)<<4))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(d&63))) & 0xFF]; } else { crc = (crc >>> 8) ^ table[(crc ^ (224|((c>>12)&15))) & 0xFF]; diff --git a/ctest/test.js b/ctest/test.js index aa8f1e7..4697273 100644 --- a/ctest/test.js +++ b/ctest/test.js @@ -5,8 +5,11 @@ if(typeof require !== 'undefined') { describe('source',function(){it('should load',function(){X=require('./');});}); bits = require('./misc/bits.js'); crc32table = require('./misc/table.js'); + fs = require("fs"); } else { X = CRC32; } +function readlines(f) { return fs.readFileSync(f, "ascii").split("\n").filter(function(f) { return !!f; }); } + describe('crc32 table', function() { it('should match fixed table', function() { var badness = 0; @@ -30,4 +33,20 @@ describe('crc32 bits', function() { }); }); }); - +if(typeof require !== 'undefined') describe("unicode", function() { + if(!fs.existsSync("./test_files/uccat.txt")) return;; + var uccat = readlines("./test_files/uccat.txt"); + uccat.forEach(function(cat) { + it("Category " + cat, function() { + if(!fs.existsSync("./test_files/baseline." + cat + ".txt")) return; + var corpus = readlines("./test_files/baseline." + cat + ".txt"); + var uctable = require("./test_files/uctable." + cat + ".js"); + uctable.forEach(function(c, i) { + /* since the baselines are passed via utf8, discard invalid codes */ + if(c.charCodeAt(0) >= 0xD800 && c.charCodeAt(0) < 0xE000) return; + var cc = corpus[i], dd = X.str(c); + assert.equal(dd, cc, ":" + i + ":" + c + ":" + cc + ":" + dd); + }); + }); + }); +}); diff --git a/misc/make_baseline.sh b/misc/make_baseline.sh new file mode 100755 index 0000000..47dbb8c --- /dev/null +++ b/misc/make_baseline.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# make_baseline.sh -- generate baselines for tests +# Copyright (C) 2016-present SheetJS +OUTD=../test_files +CATURL=https://mathias.html5.org/data/unicode/8.0.0/categories/ +CATF=$OUTD/uccat.txt + +ECHORED() { echo -ne '\x1B[0;31m'; echo -n $1; echo -ne '\x1B[0m'; echo; } + +if [ -d misc ]; then cd misc; fi +mkdir -p $OUTD +if [ ! -e $CATF ]; then curl "$CATURL" | grep "code-points" | sed 's/.*="//g;s/-.*//g' > $CATF; fi + +while read line; do + JSF=uctable.${line}.js + PYF=uctable_${line}.py + BLF=baseline.${line}.txt + JSURL="https://mathias.html5.org/data/unicode/format?version=8.0.0&category=${line}&type=symbols&prepend=var+unicode%20%3D%20&append=%3Bif(typeof%20module%20!%3D%3D%20'undefined')%20module.exports%20%3D%20unicode%3B" + if [[ ! -e $OUTD/$JSF || ! -e $OUTD/$PYF || ! -e $OUTD/$BLF ]]; then + ECHORED "Processing ${line}" + if [ ! -e $JSF ]; then + rm -f $PYF $BLF ${PYF}c + echo "Downloading JS" + $PYF + fi + if [ ! -e $BLF ]; then + echo "Building Baseline text" + python make_unicode_crc.py ${line} > baseline.${line}.txt + fi + for i in $JSF $PYF $BLF; do if [ -e $i ]; then mv $i $OUTD/; fi; done + rm -f uctable_${line}.pyc + fi +done < $CATF + diff --git a/misc/make_unicode_crc.njs b/misc/make_unicode_crc.njs new file mode 100644 index 0000000..9d53aa0 --- /dev/null +++ b/misc/make_unicode_crc.njs @@ -0,0 +1,5 @@ +#!/usr/bin/env node +argv = process.argv.slice(2); +var enc = require('codepage').utils.encode; +function arr(x) { return [].slice.call(enc(65001, x)); } +console.log(require('./uctable.' + argv[0]).map(arr)); diff --git a/misc/make_unicode_crc.py b/misc/make_unicode_crc.py new file mode 100644 index 0000000..04137cb --- /dev/null +++ b/misc/make_unicode_crc.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# make_unicode_crc.py -- generate baselines for tests +# Copyright (C) 2016-present SheetJS + +from zlib import crc32 +from array import array +from sys import argv, stderr, exit +from importlib import import_module + +args = argv[1:] + +if len(args) < 1: + print >>stderr, "usage: " + argv[0] + " " + exit(1) + +uctable = import_module("uctable_" + args[0]).uctable + +for z in uctable: + print crc32(array('B', z)); diff --git a/misc/spin.sh b/misc/spin.sh index 9951a57..471dfee 100755 --- a/misc/spin.sh +++ b/misc/spin.sh @@ -1,6 +1,6 @@ #!/bin/bash # spin.sh -- show a spinner (for coverage test) -# Copyright (C) 2014 SheetJS +# Copyright (C) 2014-present SheetJS wpid=$1 delay=1 diff --git a/package.json b/package.json index 384b031..2b7c7fa 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,12 @@ { "name": "crc-32", - "version": "0.3.0", + "version": "0.4.0", "author": "sheetjs", "description": "Pure-JS CRC-32", "keywords": [ "crc32", "checksum", "crc" ], "main": "./crc32", "devDependencies": { + "codepage":"", "mocha":"", "uglify-js":"" }, diff --git a/perf/bm.js b/perf/bm.js index 4dbebc5..e0cf979 100644 --- a/perf/bm.js +++ b/perf/bm.js @@ -1,4 +1,4 @@ -/* bm.js (C) 2014 SheetJS -- http://sheetjs.com */ +/* bm.js (C) 2014-present SheetJS -- http://sheetjs.com */ var Benchmark = require('benchmark'); var c = require('ansi')(process.stdout); diff --git a/test.js b/test.js index aa8f1e7..4697273 100644 --- a/test.js +++ b/test.js @@ -5,8 +5,11 @@ if(typeof require !== 'undefined') { describe('source',function(){it('should load',function(){X=require('./');});}); bits = require('./misc/bits.js'); crc32table = require('./misc/table.js'); + fs = require("fs"); } else { X = CRC32; } +function readlines(f) { return fs.readFileSync(f, "ascii").split("\n").filter(function(f) { return !!f; }); } + describe('crc32 table', function() { it('should match fixed table', function() { var badness = 0; @@ -30,4 +33,20 @@ describe('crc32 bits', function() { }); }); }); - +if(typeof require !== 'undefined') describe("unicode", function() { + if(!fs.existsSync("./test_files/uccat.txt")) return;; + var uccat = readlines("./test_files/uccat.txt"); + uccat.forEach(function(cat) { + it("Category " + cat, function() { + if(!fs.existsSync("./test_files/baseline." + cat + ".txt")) return; + var corpus = readlines("./test_files/baseline." + cat + ".txt"); + var uctable = require("./test_files/uctable." + cat + ".js"); + uctable.forEach(function(c, i) { + /* since the baselines are passed via utf8, discard invalid codes */ + if(c.charCodeAt(0) >= 0xD800 && c.charCodeAt(0) < 0xE000) return; + var cc = corpus[i], dd = X.str(c); + assert.equal(dd, cc, ":" + i + ":" + c + ":" + cc + ":" + dd); + }); + }); + }); +}); diff --git a/test_files/uccat.txt b/test_files/uccat.txt new file mode 100644 index 0000000..63fe097 --- /dev/null +++ b/test_files/uccat.txt @@ -0,0 +1,38 @@ +C +Cc +Cf +Cn +Co +Cs +L +LC +Ll +Lm +Lo +Lt +Lu +M +Mc +Me +Mn +N +Nd +Nl +No +P +Pc +Pd +Pe +Pf +Pi +Po +Ps +S +Sc +Sk +Sm +So +Z +Zl +Zp +Zs