version bump 1.15.0: added 28591 to cpexcel
This commit is contained in:
parent
ff3c114453
commit
6d1f7cc2d6
3
.github/FUNDING.yml
vendored
Normal file
3
.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
github: SheetJSDev
|
||||
custom: https://sheetjs.com
|
||||
open_collective: s5s
|
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
node_modules
|
||||
package-lock.json
|
||||
*.tgz
|
||||
codepages/
|
||||
.vocrc
|
||||
misc/coverage.html
|
||||
codepage_mini.md
|
||||
ctest/sauce*
|
@ -22,6 +22,7 @@ Wansung
|
||||
halfwidth
|
||||
|
||||
# Third-party
|
||||
ECMA-35
|
||||
FoxPro
|
||||
nodejs
|
||||
unicode.org
|
||||
|
24
Makefile
24
Makefile
@ -3,8 +3,11 @@ LIB=codepage
|
||||
VOC=voc
|
||||
TARGET=cptable.js
|
||||
AUXTARGETS=cputils.js cpexcel.js sbcs.js
|
||||
DISTFULL=cpexcel sbcs cptable
|
||||
CMDS=bin/codepage.njs
|
||||
DISTFULL_A=cpexcel sbcs
|
||||
DISTFULL_B=cptable
|
||||
CMDS=
|
||||
#DISTBITS=iso2022.js
|
||||
DISTBITS=
|
||||
|
||||
ULIB=$(shell echo $(LIB) | tr a-z A-Z)
|
||||
FLOWTARGET=cptable.js
|
||||
@ -16,14 +19,16 @@ CLOSURE=/usr/local/lib/node_modules/google-closure-compiler/compiler.jar
|
||||
all: voc ## Build library and auxiliary scripts
|
||||
|
||||
.PHONY: voc
|
||||
voc test.js: codepage.md
|
||||
$(VOC) codepage.md
|
||||
voc: codepage.md
|
||||
@make js
|
||||
#$(VOC) codepage.md
|
||||
|
||||
.PHONY: js
|
||||
js: make.sh codepage.md ## Build all output targets
|
||||
bash make.sh <(awk -F, '$$3=="1"' pages.csv) sbcs.js cptable
|
||||
bash make.sh excel.csv cpexcel.js cptable
|
||||
bash make.sh
|
||||
bash make.sh pages.csv cptable.js cptable
|
||||
#node iso2022/make_iso2022.njs > iso2022.js
|
||||
make cputils.js
|
||||
|
||||
cputils.js: cputils.flow.js
|
||||
@ -31,17 +36,18 @@ cputils.js: cputils.flow.js
|
||||
|
||||
.PHONY: clean
|
||||
clean: ## Remove targets and build artifacts
|
||||
rm -f make.sh .vocrc pages.csv bits/*.js
|
||||
rm -f bits/*.js
|
||||
|
||||
.PHONY: dist
|
||||
dist: $(TARGET) $(AUXTARGETS) ## Copy files for distribution
|
||||
cp $(TARGET) $(AUXTARGETS) LICENSE dist/
|
||||
for i in $(DISTFULL); do cat $$i.js cputils.js | sed "s#require('./cptable')#cptable#" > dist/$$i.full.js; done
|
||||
for i in $(DISTFULL_A); do cat $$i.js cputils.js | sed "s#require('./cptable')#cptable#" > dist/$$i.full.js; done
|
||||
for i in $(DISTFULL_B); do cat $$i.js $(DISTBITS) cputils.js | sed "s#require('./cptable')#cptable#" > dist/$$i.full.js; done
|
||||
|
||||
## Testing
|
||||
|
||||
.PHONY: test mocha
|
||||
test mocha: test.js $(TARGET) baseline ## Run test suite
|
||||
test mocha: $(TARGET) baseline ## Run test suite
|
||||
mocha -R spec -t 20000
|
||||
|
||||
.PHONY: ctest
|
||||
@ -67,7 +73,7 @@ fullint: lint old-lint tslint flow mdlint ## Run all checks
|
||||
|
||||
.PHONY: lint
|
||||
lint: $(TARGET) $(AUXTARGETS) ## Run eslint checks
|
||||
@eslint --ext .js,.njs,.json,.html,.htm $(TARGET) $(AUXTARGETS) $(CMDS) $(HTMLLINT) package.json bower.json
|
||||
@eslint --ext .js,.njs,.json,.html,.htm $(TARGET) $(AUXTARGETS) $(CMDS) $(HTMLLINT) package.json
|
||||
if [ -e $(CLOSURE) ]; then java -jar $(CLOSURE) $(REQS) $(FLOWTARGET) --jscomp_warning=reportUnknownTypes >/dev/null; fi
|
||||
|
||||
.PHONY: old-lint
|
||||
|
17
README.md
17
README.md
@ -26,7 +26,7 @@ Alternatively, use the full version in the dist folder:
|
||||
```
|
||||
|
||||
The complete set of codepages is large due to some Double Byte Character Set
|
||||
encodings. A much smaller file that just includes SBCS codepages is provided in
|
||||
encodings. A much smaller file that only includes SBCS codepages is provided in
|
||||
this repo (`sbcs.js`), as well as a file for other projects (`cpexcel.js`)
|
||||
|
||||
If you know which codepages you need, you can include individual scripts for
|
||||
@ -133,7 +133,7 @@ to produce a complete script like `cpexcel.full.js`.
|
||||
## Building the complete script
|
||||
|
||||
This script uses [voc](npm.im/voc). The script to build the codepage tables and
|
||||
the JS source is `codepage.md`, so building is as simple as `voc codepage.md`.
|
||||
the JS source is `codepage.md`, so building involves `voc codepage.md`.
|
||||
|
||||
## Generated Codepages
|
||||
|
||||
@ -274,11 +274,11 @@ tables are not generated, there is no corresponding entry (they are "magic").
|
||||
| `29001` | Windows 7 | Europa 3 |
|
||||
| `38598` | Windows 7 | ISO 8859-8 Hebrew (ISO-Logical) |
|
||||
| `47451` | unicode.org | Atari ST/TT |
|
||||
| `50220` | Windows 7 | ISO 2022 JIS Japanese with no halfwidth Katakana |
|
||||
| `50221` | Windows 7 | ISO 2022 JIS Japanese with halfwidth Katakana |
|
||||
| `50222` | Windows 7 | ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)|
|
||||
| `50225` | Windows 7 | ISO 2022 Korean |
|
||||
| `50227` | Windows 7 | ISO 2022 Simplified Chinese |
|
||||
| `50220` | magic | ISO 2022 JIS Japanese with no halfwidth Katakana |
|
||||
| `50221` | magic | ISO 2022 JIS Japanese with halfwidth Katakana |
|
||||
| `50222` | magic | ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)|
|
||||
| `50225` | magic | ISO 2022 Korean |
|
||||
| `50227` | magic | ISO 2022 Simplified Chinese |
|
||||
| `51932` | Windows 7 | EUC Japanese |
|
||||
| `51936` | Windows 7 | EUC Simplified Chinese |
|
||||
| `51949` | Windows 7 | EUC Korean |
|
||||
@ -330,6 +330,9 @@ To update the browser artifacts, run `make ctest`.
|
||||
- [Windows Code Page Enumeration](http://msdn.microsoft.com/en-us/library/cc195051.aspx)
|
||||
- [Windows Code Page Identifiers](http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx)
|
||||
- [IBM Coded Character Sets](https://www-01.ibm.com/software/globalization/ccsid/ccsid_registered.html)
|
||||
- [ISO/IEC 2022 / ECMA-35](https://www.ecma-international.org/publications/files/ECMA-ST/Ecma-035.pdf)
|
||||
- [International Register of Coded Character Sets To Be Used With Escape Sequences](https://www.itscj.ipsj.or.jp/itscj_english/iso-ir/ISO-IR.pdf)
|
||||
- [Japanese Character Encoding for Internet Messages](https://tools.ietf.org/html/rfc1468)
|
||||
|
||||
## License
|
||||
|
||||
|
671
codepage.md
671
codepage.md
@ -2,78 +2,14 @@
|
||||
|
||||
The fields of the `pages.csv` manifest are `codepage,url,bytes` (SBCS=1, DBCS=2)
|
||||
|
||||
```>pages.csv
|
||||
37,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT,1
|
||||
437,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT,1
|
||||
500,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT,1
|
||||
737,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT,1
|
||||
775,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT,1
|
||||
850,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT,1
|
||||
852,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT,1
|
||||
855,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT,1
|
||||
857,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT,1
|
||||
860,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT,1
|
||||
861,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT,1
|
||||
862,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT,1
|
||||
863,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT,1
|
||||
864,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT,1
|
||||
865,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT,1
|
||||
866,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT,1
|
||||
869,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT,1
|
||||
874,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT,1
|
||||
875,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT,1
|
||||
932,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT,2
|
||||
936,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT,2
|
||||
949,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT,2
|
||||
950,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT,2
|
||||
1026,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT,1
|
||||
1250,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT,1
|
||||
1251,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT,1
|
||||
1252,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT,1
|
||||
1253,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT,1
|
||||
1254,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT,1
|
||||
1255,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT,1
|
||||
1256,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT,1
|
||||
1257,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT,1
|
||||
1258,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT,1
|
||||
47451,http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT,1
|
||||
```
|
||||
|
||||
Note that the Windows rendering is used for the Mac code pages. The primary
|
||||
difference is the use of the private `0xF8FF` code (which renders as an Apple
|
||||
logo on macs but as garbage on other operating systems). It may be desirable
|
||||
to fall back to the behavior, in which case the files are under APPLE and not
|
||||
MICSFT. Codepages are an absolute pain :/
|
||||
|
||||
```>pages.csv
|
||||
10000,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT,1
|
||||
10006,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT,1
|
||||
10007,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT,1
|
||||
10029,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT,1
|
||||
10079,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT,1
|
||||
10081,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT,1
|
||||
```
|
||||
MICSFT. This affects codepages 10000, 10006, 10007, 10029, 10079, 10081
|
||||
|
||||
The numbering scheme for the `ISO-8859-X` series is `28590 + X`:
|
||||
|
||||
```>pages.csv
|
||||
28591,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT,1
|
||||
28592,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT,1
|
||||
28593,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT,1
|
||||
28594,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT,1
|
||||
28595,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT,1
|
||||
28596,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT,1
|
||||
28597,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT,1
|
||||
28598,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT,1
|
||||
28599,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT,1
|
||||
28600,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT,1
|
||||
28601,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT,1
|
||||
28603,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT,1
|
||||
28604,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT,1
|
||||
28605,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT,1
|
||||
28606,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-16.TXT,1
|
||||
```
|
||||
|
||||
## Generated Codepages
|
||||
|
||||
The following codepages are available in .NET on Windows:
|
||||
@ -142,11 +78,6 @@ The following codepages are available in .NET on Windows:
|
||||
- 21866 Ukrainian (KOI8-U); Cyrillic (KOI8-U)
|
||||
- 29001 Europa 3
|
||||
- 38598 ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
|
||||
- 50220 ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
|
||||
- 50221 ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS Allow 1 byte Kana)
|
||||
- 50222 ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS Allow 1 byte Kana - SO/SI)
|
||||
- 50225 ISO 2022 Korean
|
||||
- 50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
|
||||
- 51932 EUC Japanese
|
||||
- 51936 EUC Simplified Chinese; Chinese Simplified (EUC)
|
||||
- 51949 EUC Korean
|
||||
@ -163,107 +94,11 @@ The following codepages are available in .NET on Windows:
|
||||
- 57010 ISCII Gujarati
|
||||
- 57011 ISCII Punjabi
|
||||
|
||||
```>pages.csv
|
||||
708,,1
|
||||
720,,1
|
||||
808,,1
|
||||
858,,1
|
||||
870,,1
|
||||
872,,1
|
||||
1010,,1
|
||||
1047,,1
|
||||
1132,,1
|
||||
1140,,1
|
||||
1141,,1
|
||||
1142,,1
|
||||
1143,,1
|
||||
1144,,1
|
||||
1145,,1
|
||||
1146,,1
|
||||
1147,,1
|
||||
1148,,1
|
||||
1149,,1
|
||||
1361,,2
|
||||
10001,,2
|
||||
10002,,2
|
||||
10003,,2
|
||||
10004,,1
|
||||
10005,,1
|
||||
10008,,2
|
||||
10010,,1
|
||||
10017,,1
|
||||
10021,,1
|
||||
10082,,1
|
||||
20000,,2
|
||||
20001,,2
|
||||
20002,,2
|
||||
20003,,2
|
||||
20004,,2
|
||||
20005,,2
|
||||
20105,,1
|
||||
20106,,1
|
||||
20107,,1
|
||||
20108,,1
|
||||
20261,,2
|
||||
20269,,1
|
||||
20273,,1
|
||||
20277,,1
|
||||
20278,,1
|
||||
20280,,1
|
||||
20284,,1
|
||||
20285,,1
|
||||
20290,,1
|
||||
20297,,1
|
||||
20420,,1
|
||||
20423,,1
|
||||
20424,,1
|
||||
20833,,1
|
||||
20838,,1
|
||||
20866,,1
|
||||
20871,,1
|
||||
20880,,1
|
||||
20905,,1
|
||||
20924,,1
|
||||
20932,,2
|
||||
20936,,2
|
||||
20949,,2
|
||||
21025,,1
|
||||
21027,,1
|
||||
21866,,1
|
||||
29001,,1
|
||||
38598,,1
|
||||
50220,,2
|
||||
50221,,2
|
||||
50222,,2
|
||||
50225,,2
|
||||
50227,,2
|
||||
51932,,2
|
||||
51936,,2
|
||||
51949,,2
|
||||
52936,,2
|
||||
54936,,2
|
||||
57002,,2
|
||||
57003,,2
|
||||
57004,,2
|
||||
57005,,2
|
||||
57006,,2
|
||||
57007,,2
|
||||
57008,,2
|
||||
57009,,2
|
||||
57010,,2
|
||||
57011,,2
|
||||
```
|
||||
|
||||
The following codepages are dependencies for Visual FoxPro:
|
||||
|
||||
- 620 Mazovia (Polish) MS-DOS
|
||||
- 895 Kamenický (Czech) MS-DOS
|
||||
|
||||
```>pages.csv
|
||||
620,,1
|
||||
895,,1
|
||||
```
|
||||
|
||||
## Building Notes
|
||||
|
||||
The script `make.sh` (described later) will get these files and massage the data
|
||||
@ -289,13 +124,7 @@ which implies that code `0xF6` is `String.fromCharCode(0x02C6)` and vice versa.
|
||||
|
||||
To build the sources on windows, consult `dotnet/MakeEncoding.cs`.
|
||||
|
||||
After saving the standard output to `out`, a simple script processes the result:
|
||||
|
||||
```>dotnet.sh
|
||||
#!/bin/bash
|
||||
if [ ! -e dotnet/out ]; then exit; fi
|
||||
<dotnet/out tr -s ' ' '\t' | awk 'NF>2 {if(outfile) close(outfile); outfile="codepages/" $1 ".TBL"} NF==2 {print > outfile}'
|
||||
```
|
||||
After saving standard output to `out`, the `dotnet.sh` script processes results.
|
||||
|
||||
# Building the script
|
||||
|
||||
@ -304,43 +133,9 @@ generates JS code for encoding and decoding:
|
||||
|
||||
## Raw Codepages
|
||||
|
||||
```>make.njs
|
||||
#!/usr/bin/env node
|
||||
var argv = process.argv.slice(1), fs = require('fs');
|
||||
if(argv.length < 2) {
|
||||
console.error("usage: make.njs <codepage_index> [variable]");
|
||||
process.exit(22); /* EINVAL */
|
||||
}
|
||||
|
||||
var cp/*:string*/ = argv[1];
|
||||
var jsvar/*:string*/ = argv[2] || "cptable";
|
||||
var x/*:string*/ = fs.readFileSync("codepages/" + cp + ".TBL","utf8");
|
||||
var maxcp = 0, i = 0, ii = 0;
|
||||
|
||||
var y/*:Array<Array<number> >*/ = x.split("\n").map(function(z/*:string*/)/*:Array<number>*/ {
|
||||
var w/*:Array<string>*/ = z.split("\t");
|
||||
if(w.length < 2) return [Number(w[0])];
|
||||
return [Number(w[0]), Number(w[1])];
|
||||
}).filter(function(z) { return z.length > 1; });
|
||||
```
|
||||
|
||||
The DBCS and SBCS code generation strategies are different. The maximum code is
|
||||
used to distinguish (max `0xFF` for SBCS).
|
||||
|
||||
```
|
||||
for(i = 0; i != y.length; ++i) if(y[i][0] > maxcp) maxcp = y[i][0];
|
||||
|
||||
var enc/*:{[key:string]:number}*/ = {}, dec/*:{[key:string]:string}|Array<string>*/ = (maxcp < 256 ? [] : {});
|
||||
for(i = 0; i != y.length; ++i) {
|
||||
/*:: if(Array.isArray(dec)) */ dec[y[i][0]] = String.fromCharCode(y[i][1]);
|
||||
enc[String.fromCharCode(y[i][1])] = y[i][0];
|
||||
}
|
||||
|
||||
var odec = "", outstr = "";
|
||||
if(maxcp < 256) {
|
||||
/*:: if(Array.isArray(dec)) { */
|
||||
```
|
||||
|
||||
The Unicode character `0xFFFD` (REPLACEMENT CHARACTER) is used as a placeholder
|
||||
for characters that are not specified in the map (for example, `0xF0` is not in
|
||||
code page 10000).
|
||||
@ -348,14 +143,6 @@ code page 10000).
|
||||
For SBCS, the idea is to embed a raw string with the contents of the 256 codes.
|
||||
The `dec` field is merely a split of the string, and `enc` is an eversion:
|
||||
|
||||
```
|
||||
for(i = 0; i != 256; ++i) if(typeof dec[i] === "undefined") dec[i] = String.fromCharCode(0xFFFD);
|
||||
odec = JSON.stringify(dec.join(""));
|
||||
outstr = '(function(){ var d = ' + odec + ', D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();';
|
||||
/*:: } */
|
||||
} else {
|
||||
```
|
||||
|
||||
DBCS is similar, except that the space is sliced in chunks of 256 bytes (strings
|
||||
are only generated for those high-bytes represented in the codepage).
|
||||
|
||||
@ -363,27 +150,6 @@ The strategy is to construct an array-of-arrays so that `dd[high][low]` is the
|
||||
character associated with the code. This array is combined at runtime to yield
|
||||
the complete decoding object (and the encoding object is an eversion):
|
||||
|
||||
```
|
||||
var dd = [];
|
||||
/*:: if(!Array.isArray(dec)) { */
|
||||
for(i in dec) if(dec.hasOwnProperty(i)) {
|
||||
ii = +i;
|
||||
if(typeof dd[ii >> 8] === "undefined") dd[ii >> 8] = [];
|
||||
dd[ii >> 8][ii % 256] = dec[i];
|
||||
}
|
||||
/*:: } */
|
||||
outstr = '(function(){ var d = [], e = {}, D = [], j;\n';
|
||||
for(var i = 0; i != 256; ++i) if(dd[i]) {
|
||||
for(var j = 0; j != 256; ++j) if(typeof dd[i][j] === "undefined") dd[i][j] = String.fromCharCode(0xFFFD);
|
||||
outstr += 'D[' + i + '] = ' + JSON.stringify(dd[i].join("")) + '.split("");\n';
|
||||
outstr += 'for(j = 0; j != D[' + i + '].length; ++j) if(D[' + i + '][j].charCodeAt(0) !== 0xFFFD) { e[D[' + i + '][j]] = ' + (i*256) + ' + j; d[' + (i*256) + ' + j] = D[' + i + '][j];}\n'
|
||||
}
|
||||
outstr += 'return {"enc": e, "dec": d }; })();';
|
||||
}
|
||||
process.stdout.write(jsvar + "[" + cp + "] = " + outstr + "\n");
|
||||
|
||||
```
|
||||
|
||||
`make.sh` generates the tables used by `make.njs`. The raw Unicode TXT files
|
||||
are columnar: `code unicode #comments`. For example, the last 10 lines of the
|
||||
text file `ROMAN.TXT` (for CP 10000) are:
|
||||
@ -404,35 +170,6 @@ text file `ROMAN.TXT` (for CP 10000) are:
|
||||
In processing the data, the comments (after the `#`) are stripped and undefined
|
||||
elements (like `0x7F` for CP 10000) are removed.
|
||||
|
||||
```>make.sh
|
||||
#!/bin/bash
|
||||
INFILE=${1:-pages.csv}
|
||||
OUTFILE=${2:-cptable.js}
|
||||
JSVAR=${3:-cptable}
|
||||
VERSION=$(cat package.json | grep version | tr -dc [0-9.])
|
||||
|
||||
mkdir -p codepages bits
|
||||
rm -f $OUTFILE $OUTFILE.tmp
|
||||
echo "/* $OUTFILE (C) 2013-present SheetJS -- http://sheetjs.com */" > $OUTFILE.tmp
|
||||
echo "/*jshint -W100 */" >> $OUTFILE.tmp
|
||||
echo "var $JSVAR = {version:\"$VERSION\"};" >> $OUTFILE.tmp
|
||||
if [ -e dotnet.sh ]; then bash dotnet.sh; fi
|
||||
awk -F, '{print $1, $2, $3}' $INFILE | while read cp url cptype; do
|
||||
echo $cp $url
|
||||
if [ ! -e codepages/$cp.TBL ]; then
|
||||
curl $url | sed 's/#.*//g' | awk 'NF==2' > codepages/$cp.TBL
|
||||
fi
|
||||
echo "if(typeof $JSVAR === 'undefined') $JSVAR = {};" > bits/$cp.js.tmp
|
||||
node make.njs $cp $JSVAR | tee -a bits/$cp.js.tmp >> $OUTFILE.tmp
|
||||
sed 's/"\([0-9]+\)":/\1:/g' <bits/$cp.js.tmp >bits/$cp.js
|
||||
rm -f bits/$cp.js.tmp
|
||||
done
|
||||
echo "// eslint-disable-next-line no-undef" >> $OUTFILE.tmp
|
||||
echo "if (typeof module !== 'undefined' && module.exports && typeof DO_NOT_EXPORT_CODEPAGE === 'undefined') module.exports = $JSVAR;" >> $OUTFILE.tmp
|
||||
sed 's/"\([0-9]+\)":/\1:/g' <$OUTFILE.tmp >$OUTFILE
|
||||
rm -f $OUTFILE.tmp
|
||||
```
|
||||
|
||||
## Utilities
|
||||
|
||||
The encode and decode functions are kept in a separate script (`cputils.js`).
|
||||
@ -446,412 +183,8 @@ Both encode and decode deal with data represented as:
|
||||
The `ofmt` variable controls `encode` output (`str`, `arr` respectively)
|
||||
while the input format is automatically determined.
|
||||
|
||||
# Tests
|
||||
|
||||
```>test.js
|
||||
var fs = require('fs'), assert = require('assert'), vm = require('vm');
|
||||
var cptable, sbcs;
|
||||
|
||||
```
|
||||
|
||||
Due to a bug in `Buffer.from` in node `4.0 - 4.4`, a special check is needed:
|
||||
|
||||
```>test.js
|
||||
var Buffer_from = function(){};
|
||||
|
||||
if(typeof Buffer !== 'undefined') {
|
||||
var nbfs = !Buffer.from;
|
||||
if(!nbfs) try { Buffer.from("foo", "utf8"); } catch(e) { nbfs = true; }
|
||||
Buffer_from = nbfs ? function(buf, enc) { return (enc) ? new Buffer(buf, enc) : new Buffer(buf); } : Buffer.from.bind(Buffer);
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
The tests include JS validity tests (requiring or evaluating code):
|
||||
|
||||
```>test.js
|
||||
describe('source', function() {
|
||||
it('should load node', function() { cptable = require('./'); });
|
||||
it('should load sbcs', function() { sbcs = require('./sbcs'); });
|
||||
it('should load excel', function() { excel = require('./cpexcel'); });
|
||||
it('should process bits', function() {
|
||||
var files = fs.readdirSync('bits').filter(function(x){return x.substr(-3)==".js";});
|
||||
files.forEach(function(x) {
|
||||
vm.runInThisContext(fs.readFileSync('./bits/' + x));
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The README tests verify the snippets in the README:
|
||||
|
||||
```>test.js
|
||||
describe('README', function() {
|
||||
var readme = function() {
|
||||
var unicode_cp10000_255 = cptable[10000].dec[255]; // ˇ
|
||||
assert.equal(unicode_cp10000_255, "ˇ");
|
||||
|
||||
var cp10000_711 = cptable[10000].enc[String.fromCharCode(711)]; // 255
|
||||
assert.equal(cp10000_711, 255);
|
||||
|
||||
var b1 = [0xbb,0xe3,0xd7,0xdc];
|
||||
var s1 = b1.map(function(x) { return String.fromCharCode(x); }).join("");
|
||||
var 汇总 = cptable.utils.decode(936, b1);
|
||||
var buf = cptable.utils.encode(936, 汇总);
|
||||
assert.equal(汇总,"汇总");
|
||||
assert.equal(buf.length, 4);
|
||||
for(var i = 0; i != 4; ++i) assert.equal(b1[i], buf[i]);
|
||||
|
||||
var b2 = [0xf0,0x9f,0x8d,0xa3];
|
||||
var sushi= cptable.utils.decode(65001, b2);
|
||||
var sbuf = cptable.utils.encode(65001, sushi);
|
||||
assert.equal(sushi,"🍣");
|
||||
assert.equal(sbuf.length, 4);
|
||||
for(var i = 0; i != 4; ++i) assert.equal(b2[i], sbuf[i]);
|
||||
|
||||
};
|
||||
it('should be correct', function() {
|
||||
cptable.utils.cache.encache();
|
||||
readme();
|
||||
cptable.utils.cache.decache();
|
||||
readme();
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The consistency tests make sure that encoding and decoding are pseudo inverses:
|
||||
|
||||
```>test.js
|
||||
describe('consistency', function() {
|
||||
cptable = require('./');
|
||||
U = cptable.utils;
|
||||
var chk = function(cptable, cacheit) { return function(x) {
|
||||
it('should consistently process CP ' + x, function() {
|
||||
var cp = cptable[x], D = cp.dec, E = cp.enc;
|
||||
if(cacheit) cptable.utils.cache.encache();
|
||||
else cptable.utils.cache.decache();
|
||||
Object.keys(D).forEach(function(d) {
|
||||
if(E[D[d]] != d) {
|
||||
if(typeof E[D[d]] !== "undefined") return;
|
||||
if(D[d].charCodeAt(0) == 0xFFFD) return;
|
||||
if(D[E[D[d]]] === D[d]) return;
|
||||
throw new Error(x + " e.d[" + d + "] = " + E[D[d]] + "; d[" + d + "]=" + D[d] + "; d.e.d[" + d + "] = " + D[E[D[d]]]);
|
||||
}
|
||||
});
|
||||
Object.keys(E).forEach(function(e) {
|
||||
if(D[E[e]] != e) {
|
||||
throw new Error(x + " d.e[" + e + "] = " + D[E[e]] + "; e[" + e + "]=" + E[e] + "; e.d.e[" + e + "] = " + E[D[E[e]]]);
|
||||
}
|
||||
});
|
||||
var corpus = ["foobar"];
|
||||
corpus.forEach(function(w){
|
||||
assert.equal(U.decode(x,U.encode(x,w)),w);
|
||||
});
|
||||
cptable.utils.cache.encache();
|
||||
});
|
||||
}; };
|
||||
describe('cached', function() {
|
||||
Object.keys(cptable).filter(function(w) { return w == +w; }).forEach(chk(cptable, true));
|
||||
});
|
||||
describe('direct', function() {
|
||||
Object.keys(cptable).filter(function(w) { return w == +w; }).forEach(chk(cptable, false));
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The next tests look at possible entry conditions:
|
||||
|
||||
```
|
||||
describe('entry conditions', function() {
|
||||
it('should fail to load utils if cptable unavailable', function() {
|
||||
var sandbox = {};
|
||||
var ctx = vm.createContext(sandbox);
|
||||
assert.throws(function() {
|
||||
vm.runInContext(fs.readFileSync('cputils.js','utf8'),ctx);
|
||||
});
|
||||
});
|
||||
it('should load utils if cptable is available', function() {
|
||||
var sandbox = {};
|
||||
var ctx = vm.createContext(sandbox);
|
||||
vm.runInContext(fs.readFileSync('cpexcel.js','utf8'),ctx);
|
||||
vm.runInContext(fs.readFileSync('cputils.js','utf8'),ctx);
|
||||
});
|
||||
var chken = function(cp, i) {
|
||||
var c = function(cp, i, e) {
|
||||
var str = cptable.utils.encode(cp,i,e);
|
||||
var arr = cptable.utils.encode(cp,i.split(""),e);
|
||||
assert.deepEqual(str,arr);
|
||||
if(typeof Buffer === 'undefined') return;
|
||||
var buf = cptable.utils.encode(cp,Buffer_from(i),e);
|
||||
assert.deepEqual(str,buf);
|
||||
};
|
||||
cptable.utils.cache.encache();
|
||||
c(cp,i);
|
||||
c(cp,i,'buf');
|
||||
c(cp,i,'arr');
|
||||
c(cp,i,'str');
|
||||
cptable.utils.cache.decache();
|
||||
c(cp,i);
|
||||
c(cp,i,'buf');
|
||||
c(cp,i,'arr');
|
||||
c(cp,i,'str');
|
||||
};
|
||||
describe('encode', function() {
|
||||
it('CP 1252 : sbcs', function() { chken(1252,"foo•bþr"); });
|
||||
it('CP 708 : sbcs', function() { chken(708,"ت and ث smiley faces");});
|
||||
it('CP 936 : dbcs', function() { chken(936, "这是中文字符测试");});
|
||||
});
|
||||
var chkde = function(cp, i) {
|
||||
var c = function(cp, i) {
|
||||
var s;
|
||||
if(typeof Buffer !== 'undefined' && i instanceof Buffer) s = [].map.call(i, function(s){return String.fromCharCode(s); });
|
||||
else s=(i.map) ? i.map(function(s){return String.fromCharCode(s); }) : i;
|
||||
var str = cptable.utils.decode(cp,i);
|
||||
var arr = cptable.utils.decode(cp,s.join?s.join(""):s);
|
||||
assert.deepEqual(str,arr);
|
||||
if(typeof Buffer === 'undefined') return;
|
||||
var buf = cptable.utils.decode(cp,Buffer_from(i));
|
||||
assert.deepEqual(str,buf);
|
||||
};
|
||||
cptable.utils.cache.encache();
|
||||
c(cp,i);
|
||||
cptable.utils.cache.decache();
|
||||
c(cp,i);
|
||||
};
|
||||
describe('decode', function() {
|
||||
it('CP 1252 : sbcs', function() { chkde(1252,[0x66, 0x6f, 0x6f, 0x62, 0x61, 0x72]); }); /* "foobar" */
|
||||
if(typeof Buffer !== 'undefined') it('CP 708 : sbcs', function() { chkde(708, Buffer_from([0xca, 0x20, 0x61, 0x6e, 0x64, 0x20, 0xcb, 0x20, 0x73, 0x6d, 0x69, 0x6c, 0x65, 0x79, 0x20, 0x66, 0x61, 0x63, 0x65, 0x73])); }); /* ("ت and ث smiley faces") */
|
||||
it('CP 936 : dbcs', function() { chkde(936, [0xd5, 0xe2, 0xca, 0xc7, 0xd6, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xb7, 0xfb, 0xb2, 0xe2, 0xca, 0xd4]);}); /* "这是中文字符测试" */
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The `testfile` helper function reads a file and compares to node's read facilities:
|
||||
|
||||
```>test.js
|
||||
function testfile(f,cp,type,skip) {
|
||||
var d = fs.readFileSync(f);
|
||||
var x = fs.readFileSync(f, type);
|
||||
var a = x.split("");
|
||||
var chk = function(cp) {
|
||||
var y = cptable.utils.decode(cp, d);
|
||||
assert.equal(x,y);
|
||||
var z = cptable.utils.encode(cp, x);
|
||||
if(z.length != d.length) throw new Error(f + " " + JSON.stringify(z) + " != " + JSON.stringify(d) + " : " + z.length + " " + d.length);
|
||||
for(var i = 0; i != d.length; ++i) if(d[i] !== z[i]) throw new Error("" + i + " " + d[i] + "!=" + z[i]);
|
||||
if(skip) return;
|
||||
z = cptable.utils.encode(cp, a);
|
||||
if(z.length != d.length) throw new Error(f + " " + JSON.stringify(z) + " != " + JSON.stringify(d) + " : " + z.length + " " + d.length);
|
||||
for(var i = 0; i != d.length; ++i) if(d[i] !== z[i]) throw new Error("" + i + " " + d[i] + "!=" + z[i]);
|
||||
if(f.indexOf("cptable.js") == -1) {
|
||||
cptable.utils.encode(cp, d, 'str');
|
||||
cptable.utils.encode(cp, d, 'arr');
|
||||
}
|
||||
}
|
||||
cptable.utils.cache.encache();
|
||||
chk(cp);
|
||||
if(skip) return;
|
||||
cptable.utils.cache.decache();
|
||||
chk(cp);
|
||||
cptable.utils.cache.encache();
|
||||
}
|
||||
```
|
||||
|
||||
The `utf8` tests verify UTF-8 encoding of the actual JS sources:
|
||||
|
||||
```>test.js
|
||||
describe('node natives', function() {
|
||||
var node = [[65001, 'utf8',1], [1200, 'utf16le',1], [20127, 'ascii',0]];
|
||||
var unicodefiles = ['codepage.md','README.md','cptable.js'];
|
||||
var asciifiles = ['cputils.js'];
|
||||
node.forEach(function(w) {
|
||||
describe(w[1], function() {
|
||||
cptable = require('./');
|
||||
asciifiles.forEach(function(f) {
|
||||
it('should process ' + f, function() { testfile('./misc/'+f+'.'+w[1],w[0],w[1]); });
|
||||
});
|
||||
if(!w[2]) return;
|
||||
unicodefiles.forEach(function(f) {
|
||||
it('should process ' + f, function() { testfile('./misc/'+f+'.'+w[1],w[0],w[1]); });
|
||||
});
|
||||
if(w[1] === 'utf8') it('should process bits', function() {
|
||||
var files = fs.readdirSync('bits').filter(function(x){return x.substr(-3)==".js";});
|
||||
files.forEach(function(f) { testfile('./bits/' + f,w[0],w[1],true); });
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The `utf*` and `ascii` tests attempt to test other magic formats:
|
||||
|
||||
```>test.js
|
||||
var m = cptable.utils.magic;
|
||||
function cmp(x,z) {
|
||||
assert.equal(x.length, z.length);
|
||||
for(var i = 0; i != z.length; ++i) assert.equal(i+"/"+x.length+""+x[i], i+"/"+z.length+""+z[i]);
|
||||
}
|
||||
Object.keys(m).forEach(function(t){if(t != 16969) describe(m[t], function() {
|
||||
it("should process codepage.md." + m[t], fs.existsSync('./misc/codepage.md.' + m[t]) ?
|
||||
function() {
|
||||
var b = fs.readFileSync('./misc/codepage.md.utf8', "utf8");
|
||||
if(m[t] === "ascii") b = b.replace(/[\u0080-\uffff]*/g,"");
|
||||
var x = fs.readFileSync('./misc/codepage.md.' + m[t]);
|
||||
var y, z;
|
||||
cptable.utils.cache.encache();
|
||||
y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
z = cptable.utils.encode(t, y);
|
||||
if(t != 65000) cmp(x,z);
|
||||
else { assert.equal(y, cptable.utils.decode(t, z)); }
|
||||
cptable.utils.cache.decache();
|
||||
y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
z = cptable.utils.encode(t, y);
|
||||
if(t != 65000) cmp(x,z);
|
||||
else { assert.equal(y, cptable.utils.decode(t, z)); }
|
||||
cptable.utils.cache.encache();
|
||||
cptable.utils.encode(t, y, 'str');
|
||||
cptable.utils.encode(t, y, 'arr');
|
||||
cptable.utils.cache.decache();
|
||||
cptable.utils.encode(t, y, 'str');
|
||||
cptable.utils.encode(t, y, 'arr');
|
||||
cptable.utils.cache.encache();
|
||||
}
|
||||
: null);
|
||||
it("should process README.md." + m[t], fs.existsSync('./misc/README.md.' + m[t]) ?
|
||||
function() {
|
||||
var b = fs.readFileSync('./misc/README.md.utf8', "utf8");
|
||||
if(m[t] === "ascii") b = b.replace(/[\u0080-\uffff]*/g,"");
|
||||
var x = fs.readFileSync('./misc/README.md.' + m[t]);
|
||||
x = [].slice.call(x);
|
||||
cptable.utils.cache.encache();
|
||||
var y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
cptable.utils.cache.decache();
|
||||
var y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
cptable.utils.cache.encache();
|
||||
}
|
||||
: null);
|
||||
});});
|
||||
```
|
||||
|
||||
The codepage `6969` is not defined, so operations should fail:
|
||||
|
||||
```>test.js
|
||||
describe('failures', function() {
|
||||
it('should fail to find CP 6969', function() {
|
||||
assert.throws(function(){cptable[6969].dec});
|
||||
assert.throws(function(){cptable[6969].enc});
|
||||
});
|
||||
it('should fail using utils', function() {
|
||||
assert(!cptable.utils.hascp(6969));
|
||||
assert.throws(function(){return cptable.utils.encode(6969, "foobar"); });
|
||||
assert.throws(function(){return cptable.utils.decode(6969, [0x20]); });
|
||||
});
|
||||
it('should fail with black magic', function() {
|
||||
assert(cptable.utils.hascp(16969));
|
||||
assert.throws(function(){return cptable.utils.encode(16969, "foobar"); });
|
||||
assert.throws(function(){return cptable.utils.decode(16969, [0x20]); });
|
||||
});
|
||||
it('should fail when presented with invalid char codes', function() {
|
||||
assert.throws(function(){cptable.utils.cache.decache(); return cptable.utils.encode(20127, [String.fromCharCode(0xAA)]);});
|
||||
});
|
||||
it('should fail to propagate UTF8 BOM in UTF7', function() {
|
||||
["+/v8-abc", "+/v9"].forEach(function(m) { assert.throws(function() {
|
||||
assert.equal(m, cptable.utils.encode(65000, cptable.utils.decode(65000, m)));
|
||||
}); });
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
# Nitty Gritty
|
||||
|
||||
```json>package.json
|
||||
{
|
||||
"name": "codepage",
|
||||
"version": "1.14.0",
|
||||
"author": "SheetJS",
|
||||
"description": "pure-JS library to handle codepages",
|
||||
"keywords": [ "codepage", "iconv", "convert", "strings" ],
|
||||
"bin": {
|
||||
"codepage": "./bin/codepage.njs"
|
||||
},
|
||||
"main": "cputils.js",
|
||||
"types": "types",
|
||||
"browser": {
|
||||
"buffer": "false"
|
||||
},
|
||||
"dependencies": {
|
||||
"commander": "~2.14.1",
|
||||
"exit-on-epipe": "~1.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"voc": "~1.1.0",
|
||||
"mocha": "~2.5.3",
|
||||
"blanket": "~1.2.3",
|
||||
"@sheetjs/uglify-js": "~2.7.3",
|
||||
"@types/node": "^8.0.7",
|
||||
"@types/commander": "^2.12.0",
|
||||
"dtslint": "^0.1.2",
|
||||
"typescript": "2.2.0"
|
||||
},
|
||||
"repository": { "type":"git", "url":"git://github.com/SheetJS/js-codepage.git"},
|
||||
"scripts": {
|
||||
"pretest": "git submodule init && git submodule update",
|
||||
"test": "make test",
|
||||
"build": "make js",
|
||||
"lint": "make fullint",
|
||||
"dtslint": "dtslint types"
|
||||
},
|
||||
"config": {
|
||||
"blanket": {
|
||||
"pattern": "[cputils.js]"
|
||||
}
|
||||
},
|
||||
"alex": {
|
||||
"allow": [
|
||||
"chinese",
|
||||
"european",
|
||||
"german",
|
||||
"japanese",
|
||||
"latin"
|
||||
]
|
||||
},
|
||||
"homepage": "http://sheetjs.com/opensource",
|
||||
"files": [
|
||||
"LICENSE",
|
||||
"README.md",
|
||||
"bin",
|
||||
"bits/*.js",
|
||||
"types/index.d.ts",
|
||||
"types/*.json",
|
||||
"cptable.js",
|
||||
"cputils.js",
|
||||
"dist/sbcs.full.js",
|
||||
"dist/cpexcel.full.js"
|
||||
],
|
||||
"bugs": { "url": "https://github.com/SheetJS/js-codepage/issues" },
|
||||
"license": "Apache-2.0",
|
||||
"engines": { "node": ">=0.8" }
|
||||
}
|
||||
```
|
||||
|
||||
```>.vocrc
|
||||
{ "post": "make js" }
|
||||
```
|
||||
|
||||
```>.gitignore
|
||||
node_modules
|
||||
package-lock.json
|
||||
*.tgz
|
||||
.gitignore
|
||||
codepages/
|
||||
.vocrc
|
||||
make.sh
|
||||
make.njs
|
||||
misc/coverage.html
|
||||
codepage_mini.md
|
||||
ctest/sauce*
|
||||
```
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* cpexcel.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cpexcel.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*jshint -W100 */
|
||||
var cptable = {version:"1.14.0"};
|
||||
var cptable = {version:"1.15.0"};
|
||||
cptable[437] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[620] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàąçêëèïîćÄĄĘęłôöĆûùŚÖÜ¢Ł¥śƒŹŻóÓńŃźż¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[737] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρσςτυφχψ░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ωάέήϊίόύϋώΆΈΉΊΌΎΏ±≥≤ΪΫ÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
@ -973,5 +973,6 @@ return {"enc": e, "dec": d }; })();
|
||||
cptable[10029] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[10079] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûüÝ°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»… ÀÃÕŒœ–—“”‘’÷◊ÿŸ⁄¤ÐðÞþý·‚„‰ÂÊÁËÈÍÎÏÌÓÔ<C393>ÒÚÛÙıˆ˜¯˘˙˚¸˝˛ˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[10081] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûü†°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»… ÀÃÕŒœ–—“”‘’÷◊ÿŸĞğİıŞş‡·‚„‰ÂÊÁËÈÍÎÏÌÓÔ<C393>ÒÚÛÙ<C39B>ˆ˜¯˘˙˚¸˝˛ˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[28591] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~
¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
// eslint-disable-next-line no-undef
|
||||
if (typeof module !== 'undefined' && module.exports && typeof DO_NOT_EXPORT_CODEPAGE === 'undefined') module.exports = cptable;
|
||||
|
1700
cptable.js
1700
cptable.js
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/* vim: set ft=javascript: */
|
||||
/*jshint newcap: false */
|
||||
/*::
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/* vim: set ft=javascript: */
|
||||
/*jshint newcap: false */
|
||||
(function(root, factory) {
|
||||
|
191
dist/LICENSE
vendored
191
dist/LICENSE
vendored
@ -1,4 +1,192 @@
|
||||
Copyright (C) 2013-present SheetJS
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright (C) 2013-present SheetJS LLC
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -11,4 +199,3 @@ Copyright (C) 2013-present SheetJS
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
7
dist/cpexcel.full.js
vendored
7
dist/cpexcel.full.js
vendored
@ -1,6 +1,6 @@
|
||||
/* cpexcel.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cpexcel.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*jshint -W100 */
|
||||
var cptable = {version:"1.14.0"};
|
||||
var cptable = {version:"1.15.0"};
|
||||
cptable[437] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[620] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàąçêëèïîćÄĄĘęłôöĆûùŚÖÜ¢Ł¥śƒŹŻóÓńŃźż¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[737] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρσςτυφχψ░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ωάέήϊίόύϋώΆΈΉΊΌΎΏ±≥≤ΪΫ÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
@ -973,9 +973,10 @@ return {"enc": e, "dec": d }; })();
|
||||
cptable[10029] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[10079] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûüÝ°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»… ÀÃÕŒœ–—“”‘’÷◊ÿŸ⁄¤ÐðÞþý·‚„‰ÂÊÁËÈÍÎÏÌÓÔ<C393>ÒÚÛÙıˆ˜¯˘˙˚¸˝˛ˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[10081] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûü†°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»… ÀÃÕŒœ–—“”‘’÷◊ÿŸĞğİıŞş‡·‚„‰ÂÊÁËÈÍÎÏÌÓÔ<C393>ÒÚÛÙ<C39B>ˆ˜¯˘˙˚¸˝˛ˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[28591] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~
¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
// eslint-disable-next-line no-undef
|
||||
if (typeof module !== 'undefined' && module.exports && typeof DO_NOT_EXPORT_CODEPAGE === 'undefined') module.exports = cptable;
|
||||
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/* vim: set ft=javascript: */
|
||||
/*jshint newcap: false */
|
||||
(function(root, factory) {
|
||||
|
5
dist/cpexcel.js
vendored
5
dist/cpexcel.js
vendored
@ -1,6 +1,6 @@
|
||||
/* cpexcel.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cpexcel.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*jshint -W100 */
|
||||
var cptable = {version:"1.14.0"};
|
||||
var cptable = {version:"1.15.0"};
|
||||
cptable[437] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[620] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàąçêëèïîćÄĄĘęłôöĆûùŚÖÜ¢Ł¥śƒŹŻóÓńŃźż¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[737] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρσςτυφχψ░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ωάέήϊίόύϋώΆΈΉΊΌΎΏ±≥≤ΪΫ÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
@ -973,5 +973,6 @@ return {"enc": e, "dec": d }; })();
|
||||
cptable[10029] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[10079] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûüÝ°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»… ÀÃÕŒœ–—“”‘’÷◊ÿŸ⁄¤ÐðÞþý·‚„‰ÂÊÁËÈÍÎÏÌÓÔ<C393>ÒÚÛÙıˆ˜¯˘˙˚¸˝˛ˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[10081] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûü†°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»… ÀÃÕŒœ–—“”‘’÷◊ÿŸĞğİıŞş‡·‚„‰ÂÊÁËÈÍÎÏÌÓÔ<C393>ÒÚÛÙ<C39B>ˆ˜¯˘˙˚¸˝˛ˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[28591] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~
¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
// eslint-disable-next-line no-undef
|
||||
if (typeof module !== 'undefined' && module.exports && typeof DO_NOT_EXPORT_CODEPAGE === 'undefined') module.exports = cptable;
|
||||
|
1702
dist/cptable.full.js
vendored
1702
dist/cptable.full.js
vendored
File diff suppressed because it is too large
Load Diff
1700
dist/cptable.js
vendored
1700
dist/cptable.js
vendored
File diff suppressed because it is too large
Load Diff
2
dist/cputils.js
vendored
2
dist/cputils.js
vendored
@ -1,4 +1,4 @@
|
||||
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/* vim: set ft=javascript: */
|
||||
/*jshint newcap: false */
|
||||
(function(root, factory) {
|
||||
|
6
dist/sbcs.full.js
vendored
6
dist/sbcs.full.js
vendored
@ -1,6 +1,6 @@
|
||||
/* sbcs.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! sbcs.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*jshint -W100 */
|
||||
var cptable = {version:"1.14.0"};
|
||||
var cptable = {version:"1.15.0"};
|
||||
cptable[37] = (function(){ var d = "\u0000\u0001\u0002\u0003\t\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013
\b\u0018\u0019\u001c\u001d\u001e\u001f\n\u0017\u001b\u0005\u0006\u0007\u0016\u0004\u0014\u0015\u001a âäàáãåçñ¢.<(+|&éêëèíîïìß!$*);¬-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿ÐÝÞ®^£¥·©§¶¼½¾[]¯¨´×{ABCDEFGHIôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[437] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[500] = (function(){ var d = "\u0000\u0001\u0002\u0003\t\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013
\b\u0018\u0019\u001c\u001d\u001e\u001f\n\u0017\u001b\u0005\u0006\u0007\u0016\u0004\u0014\u0015\u001a âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHIôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
@ -109,7 +109,7 @@ cptable[620] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\
|
||||
cptable[895] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ČüéďäĎŤčěĚĹÍľǪÄÁÉžŽôöÓůÚýÖÜŠĽÝŘťáíóúňŇŮÔšřŕŔ¼§«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
// eslint-disable-next-line no-undef
|
||||
if (typeof module !== 'undefined' && module.exports && typeof DO_NOT_EXPORT_CODEPAGE === 'undefined') module.exports = cptable;
|
||||
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/* vim: set ft=javascript: */
|
||||
/*jshint newcap: false */
|
||||
(function(root, factory) {
|
||||
|
4
dist/sbcs.js
vendored
4
dist/sbcs.js
vendored
@ -1,6 +1,6 @@
|
||||
/* sbcs.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! sbcs.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*jshint -W100 */
|
||||
var cptable = {version:"1.14.0"};
|
||||
var cptable = {version:"1.15.0"};
|
||||
cptable[37] = (function(){ var d = "\u0000\u0001\u0002\u0003\t\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013
\b\u0018\u0019\u001c\u001d\u001e\u001f\n\u0017\u001b\u0005\u0006\u0007\u0016\u0004\u0014\u0015\u001a âäàáãåçñ¢.<(+|&éêëèíîïìß!$*);¬-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿ÐÝÞ®^£¥·©§¶¼½¾[]¯¨´×{ABCDEFGHIôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[437] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[500] = (function(){ var d = "\u0000\u0001\u0002\u0003\t\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013
\b\u0018\u0019\u001c\u001d\u001e\u001f\n\u0017\u001b\u0005\u0006\u0007\u0016\u0004\u0014\u0015\u001a âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHIôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
|
@ -29,3 +29,4 @@
|
||||
10029,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT,1
|
||||
10079,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT,1
|
||||
10081,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT,1
|
||||
28591,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT,1
|
||||
|
|
@ -1,4 +1,6 @@
|
||||
<!DOCTYPE html>
|
||||
<!-- codepage (C) 2013-present SheetJS http://sheetjs.com -->
|
||||
<!-- vim: set ts=2: -->
|
||||
<html>
|
||||
<head>
|
||||
<title>js-codepage tests</title>
|
||||
|
51
make.njs
Normal file
51
make.njs
Normal file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env node
|
||||
var argv = process.argv.slice(1), fs = require('fs');
|
||||
if(argv.length < 2) {
|
||||
console.error("usage: make.njs <codepage_index> [variable]");
|
||||
process.exit(22); /* EINVAL */
|
||||
}
|
||||
|
||||
var cp/*:string*/ = argv[1];
|
||||
var jsvar/*:string*/ = argv[2] || "cptable";
|
||||
var x/*:string*/ = fs.readFileSync("codepages/" + cp + ".TBL","utf8");
|
||||
var maxcp = 0, i = 0, ii = 0;
|
||||
|
||||
var y/*:Array<Array<number> >*/ = x.split("\n").map(function(z/*:string*/)/*:Array<number>*/ {
|
||||
var w/*:Array<string>*/ = z.split("\t");
|
||||
if(w.length < 2) return [Number(w[0])];
|
||||
return [Number(w[0]), Number(w[1])];
|
||||
}).filter(function(z) { return z.length > 1; });
|
||||
for(i = 0; i != y.length; ++i) if(y[i][0] > maxcp) maxcp = y[i][0];
|
||||
|
||||
var enc/*:{[key:string]:number}*/ = {}, dec/*:{[key:string]:string}|Array<string>*/ = (maxcp < 256 ? [] : {});
|
||||
for(i = 0; i != y.length; ++i) {
|
||||
/*:: if(Array.isArray(dec)) */ dec[y[i][0]] = String.fromCharCode(y[i][1]);
|
||||
enc[String.fromCharCode(y[i][1])] = y[i][0];
|
||||
}
|
||||
|
||||
var odec = "", outstr = "";
|
||||
if(maxcp < 256) {
|
||||
/*:: if(Array.isArray(dec)) { */
|
||||
for(i = 0; i != 256; ++i) if(typeof dec[i] === "undefined") dec[i] = String.fromCharCode(0xFFFD);
|
||||
odec = JSON.stringify(dec.join(""));
|
||||
outstr = '(function(){ var d = ' + odec + ', D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();';
|
||||
/*:: } */
|
||||
} else {
|
||||
var dd = [];
|
||||
/*:: if(!Array.isArray(dec)) { */
|
||||
for(i in dec) if(dec.hasOwnProperty(i)) {
|
||||
ii = +i;
|
||||
if(typeof dd[ii >> 8] === "undefined") dd[ii >> 8] = [];
|
||||
dd[ii >> 8][ii % 256] = dec[i];
|
||||
}
|
||||
/*:: } */
|
||||
outstr = '(function(){ var d = [], e = {}, D = [], j;\n';
|
||||
for(var i = 0; i != 256; ++i) if(dd[i]) {
|
||||
for(var j = 0; j != 256; ++j) if(typeof dd[i][j] === "undefined") dd[i][j] = String.fromCharCode(0xFFFD);
|
||||
outstr += 'D[' + i + '] = ' + JSON.stringify(dd[i].join("")) + '.split("");\n';
|
||||
outstr += 'for(j = 0; j != D[' + i + '].length; ++j) if(D[' + i + '][j].charCodeAt(0) !== 0xFFFD) { e[D[' + i + '][j]] = ' + (i*256) + ' + j; d[' + (i*256) + ' + j] = D[' + i + '][j];}\n'
|
||||
}
|
||||
outstr += 'return {"enc": e, "dec": d }; })();';
|
||||
}
|
||||
process.stdout.write(jsvar + "[" + cp + "] = " + outstr + "\n");
|
||||
|
26
make.sh
Normal file
26
make.sh
Normal file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
INFILE=${1:-pages.csv}
|
||||
OUTFILE=${2:-cptable.js}
|
||||
JSVAR=${3:-cptable}
|
||||
VERSION=$(cat package.json | grep version | tr -dc [0-9.])
|
||||
|
||||
mkdir -p codepages bits
|
||||
rm -f $OUTFILE $OUTFILE.tmp
|
||||
echo "/*! $OUTFILE (C) 2013-present SheetJS -- http://sheetjs.com */" > $OUTFILE.tmp
|
||||
echo "/*jshint -W100 */" >> $OUTFILE.tmp
|
||||
echo "var $JSVAR = {version:\"$VERSION\"};" >> $OUTFILE.tmp
|
||||
if [ -e dotnet.sh ]; then bash dotnet.sh; fi
|
||||
awk -F, '{print $1, $2, $3}' $INFILE | while read cp url cptype; do
|
||||
echo $cp $url
|
||||
if [ ! -e codepages/$cp.TBL ]; then
|
||||
curl $url | sed 's/#.*//g' | awk 'NF==2' > codepages/$cp.TBL
|
||||
fi
|
||||
echo "if(typeof $JSVAR === 'undefined') $JSVAR = {};" > bits/$cp.js.tmp
|
||||
node make.njs $cp $JSVAR | tee -a bits/$cp.js.tmp >> $OUTFILE.tmp
|
||||
sed 's/"\([0-9]+\)":/\1:/g' <bits/$cp.js.tmp >bits/$cp.js
|
||||
rm -f bits/$cp.js.tmp
|
||||
done
|
||||
echo "// eslint-disable-next-line no-undef" >> $OUTFILE.tmp
|
||||
echo "if (typeof module !== 'undefined' && module.exports && typeof DO_NOT_EXPORT_CODEPAGE === 'undefined') module.exports = $JSVAR;" >> $OUTFILE.tmp
|
||||
sed 's/"\([0-9]+\)":/\1:/g' <$OUTFILE.tmp >$OUTFILE
|
||||
rm -f $OUTFILE.tmp
|
@ -26,7 +26,7 @@ Alternatively, use the full version in the dist folder:
|
||||
```
|
||||
|
||||
The complete set of codepages is large due to some Double Byte Character Set
|
||||
encodings. A much smaller file that just includes SBCS codepages is provided in
|
||||
encodings. A much smaller file that only includes SBCS codepages is provided in
|
||||
this repo (`sbcs.js`), as well as a file for other projects (`cpexcel.js`)
|
||||
|
||||
If you know which codepages you need, you can include individual scripts for
|
||||
@ -133,7 +133,7 @@ to produce a complete script like `cpexcel.full.js`.
|
||||
## Building the complete script
|
||||
|
||||
This script uses [voc](npm.im/voc). The script to build the codepage tables and
|
||||
the JS source is `codepage.md`, so building is as simple as `voc codepage.md`.
|
||||
the JS source is `codepage.md`, so building involves `voc codepage.md`.
|
||||
|
||||
## Generated Codepages
|
||||
|
||||
@ -274,11 +274,11 @@ tables are not generated, there is no corresponding entry (they are "magic").
|
||||
| `29001` | Windows 7 | Europa 3 |
|
||||
| `38598` | Windows 7 | ISO 8859-8 Hebrew (ISO-Logical) |
|
||||
| `47451` | unicode.org | Atari ST/TT |
|
||||
| `50220` | Windows 7 | ISO 2022 JIS Japanese with no halfwidth Katakana |
|
||||
| `50221` | Windows 7 | ISO 2022 JIS Japanese with halfwidth Katakana |
|
||||
| `50222` | Windows 7 | ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)|
|
||||
| `50225` | Windows 7 | ISO 2022 Korean |
|
||||
| `50227` | Windows 7 | ISO 2022 Simplified Chinese |
|
||||
| `50220` | magic | ISO 2022 JIS Japanese with no halfwidth Katakana |
|
||||
| `50221` | magic | ISO 2022 JIS Japanese with halfwidth Katakana |
|
||||
| `50222` | magic | ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)|
|
||||
| `50225` | magic | ISO 2022 Korean |
|
||||
| `50227` | magic | ISO 2022 Simplified Chinese |
|
||||
| `51932` | Windows 7 | EUC Japanese |
|
||||
| `51936` | Windows 7 | EUC Simplified Chinese |
|
||||
| `51949` | Windows 7 | EUC Korean |
|
||||
@ -330,6 +330,9 @@ To update the browser artifacts, run `make ctest`.
|
||||
- [Windows Code Page Enumeration](http://msdn.microsoft.com/en-us/library/cc195051.aspx)
|
||||
- [Windows Code Page Identifiers](http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx)
|
||||
- [IBM Coded Character Sets](https://www-01.ibm.com/software/globalization/ccsid/ccsid_registered.html)
|
||||
- [ISO/IEC 2022 / ECMA-35](https://www.ecma-international.org/publications/files/ECMA-ST/Ecma-035.pdf)
|
||||
- [International Register of Coded Character Sets To Be Used With Escape Sequences](https://www.itscj.ipsj.or.jp/itscj_english/iso-ir/ISO-IR.pdf)
|
||||
- [Japanese Character Encoding for Internet Messages](https://tools.ietf.org/html/rfc1468)
|
||||
|
||||
## License
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -26,7 +26,7 @@ Alternatively, use the full version in the dist folder:
|
||||
+AGAAYABg
|
||||
|
||||
The complete set of codepages is large due to some Double Byte Character Set
|
||||
encodings. A much smaller file that just includes SBCS codepages is provided in
|
||||
encodings. A much smaller file that only includes SBCS codepages is provided in
|
||||
this repo (+AGA-sbcs.js+AGA), as well as a file for other projects (+AGA-cpexcel.js+AGA)
|
||||
|
||||
If you know which codepages you need, you can include individual scripts for
|
||||
@ -133,7 +133,7 @@ to produce a complete script like +AGA-cpexcel.full.js+AGA.
|
||||
+ACMAIw Building the complete script
|
||||
|
||||
This script uses +AFs-voc+AF0(npm.im/voc). The script to build the codepage tables and
|
||||
the JS source is +AGA-codepage.md+AGA, so building is as simple as +AGA-voc codepage.md+AGA.
|
||||
the JS source is +AGA-codepage.md+AGA, so building involves +AGA-voc codepage.md+AGA.
|
||||
|
||||
+ACMAIw Generated Codepages
|
||||
|
||||
@ -274,11 +274,11 @@ tables are not generated, there is no corresponding entry (they are +ACI-magic+A
|
||||
+AHw +AGA-29001+AGA +AHw Windows 7 +AHw Europa 3 +AHw
|
||||
+AHw +AGA-38598+AGA +AHw Windows 7 +AHw ISO 8859-8 Hebrew (ISO-Logical) +AHw
|
||||
+AHw +AGA-47451+AGA +AHw unicode.org +AHw Atari ST/TT +AHw
|
||||
+AHw +AGA-50220+AGA +AHw Windows 7 +AHw ISO 2022 JIS Japanese with no halfwidth Katakana +AHw
|
||||
+AHw +AGA-50221+AGA +AHw Windows 7 +AHw ISO 2022 JIS Japanese with halfwidth Katakana +AHw
|
||||
+AHw +AGA-50222+AGA +AHw Windows 7 +AHw ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)+AHw
|
||||
+AHw +AGA-50225+AGA +AHw Windows 7 +AHw ISO 2022 Korean +AHw
|
||||
+AHw +AGA-50227+AGA +AHw Windows 7 +AHw ISO 2022 Simplified Chinese +AHw
|
||||
+AHw +AGA-50220+AGA +AHw magic +AHw ISO 2022 JIS Japanese with no halfwidth Katakana +AHw
|
||||
+AHw +AGA-50221+AGA +AHw magic +AHw ISO 2022 JIS Japanese with halfwidth Katakana +AHw
|
||||
+AHw +AGA-50222+AGA +AHw magic +AHw ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)+AHw
|
||||
+AHw +AGA-50225+AGA +AHw magic +AHw ISO 2022 Korean +AHw
|
||||
+AHw +AGA-50227+AGA +AHw magic +AHw ISO 2022 Simplified Chinese +AHw
|
||||
+AHw +AGA-51932+AGA +AHw Windows 7 +AHw EUC Japanese +AHw
|
||||
+AHw +AGA-51936+AGA +AHw Windows 7 +AHw EUC Simplified Chinese +AHw
|
||||
+AHw +AGA-51949+AGA +AHw Windows 7 +AHw EUC Korean +AHw
|
||||
@ -330,6 +330,9 @@ To update the browser artifacts, run +AGA-make ctest+AGA.
|
||||
- +AFs-Windows Code Page Enumeration+AF0(http://msdn.microsoft.com/en-us/library/cc195051.aspx)
|
||||
- +AFs-Windows Code Page Identifiers+AF0(http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx)
|
||||
- +AFs-IBM Coded Character Sets+AF0(https://www-01.ibm.com/software/globalization/ccsid/ccsid+AF8-registered.html)
|
||||
- +AFs-ISO/IEC 2022 / ECMA-35+AF0(https://www.ecma-international.org/publications/files/ECMA-ST/Ecma-035.pdf)
|
||||
- +AFs-International Register of Coded Character Sets To Be Used With Escape Sequences+AF0(https://www.itscj.ipsj.or.jp/itscj+AF8-english/iso-ir/ISO-IR.pdf)
|
||||
- +AFs-Japanese Character Encoding for Internet Messages+AF0(https://tools.ietf.org/html/rfc1468)
|
||||
|
||||
+ACMAIw License
|
||||
|
||||
|
@ -26,7 +26,7 @@ Alternatively, use the full version in the dist folder:
|
||||
```
|
||||
|
||||
The complete set of codepages is large due to some Double Byte Character Set
|
||||
encodings. A much smaller file that just includes SBCS codepages is provided in
|
||||
encodings. A much smaller file that only includes SBCS codepages is provided in
|
||||
this repo (`sbcs.js`), as well as a file for other projects (`cpexcel.js`)
|
||||
|
||||
If you know which codepages you need, you can include individual scripts for
|
||||
@ -133,7 +133,7 @@ to produce a complete script like `cpexcel.full.js`.
|
||||
## Building the complete script
|
||||
|
||||
This script uses [voc](npm.im/voc). The script to build the codepage tables and
|
||||
the JS source is `codepage.md`, so building is as simple as `voc codepage.md`.
|
||||
the JS source is `codepage.md`, so building involves `voc codepage.md`.
|
||||
|
||||
## Generated Codepages
|
||||
|
||||
@ -274,11 +274,11 @@ tables are not generated, there is no corresponding entry (they are "magic").
|
||||
| `29001` | Windows 7 | Europa 3 |
|
||||
| `38598` | Windows 7 | ISO 8859-8 Hebrew (ISO-Logical) |
|
||||
| `47451` | unicode.org | Atari ST/TT |
|
||||
| `50220` | Windows 7 | ISO 2022 JIS Japanese with no halfwidth Katakana |
|
||||
| `50221` | Windows 7 | ISO 2022 JIS Japanese with halfwidth Katakana |
|
||||
| `50222` | Windows 7 | ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)|
|
||||
| `50225` | Windows 7 | ISO 2022 Korean |
|
||||
| `50227` | Windows 7 | ISO 2022 Simplified Chinese |
|
||||
| `50220` | magic | ISO 2022 JIS Japanese with no halfwidth Katakana |
|
||||
| `50221` | magic | ISO 2022 JIS Japanese with halfwidth Katakana |
|
||||
| `50222` | magic | ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI)|
|
||||
| `50225` | magic | ISO 2022 Korean |
|
||||
| `50227` | magic | ISO 2022 Simplified Chinese |
|
||||
| `51932` | Windows 7 | EUC Japanese |
|
||||
| `51936` | Windows 7 | EUC Simplified Chinese |
|
||||
| `51949` | Windows 7 | EUC Korean |
|
||||
@ -330,6 +330,9 @@ To update the browser artifacts, run `make ctest`.
|
||||
- [Windows Code Page Enumeration](http://msdn.microsoft.com/en-us/library/cc195051.aspx)
|
||||
- [Windows Code Page Identifiers](http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx)
|
||||
- [IBM Coded Character Sets](https://www-01.ibm.com/software/globalization/ccsid/ccsid_registered.html)
|
||||
- [ISO/IEC 2022 / ECMA-35](https://www.ecma-international.org/publications/files/ECMA-ST/Ecma-035.pdf)
|
||||
- [International Register of Coded Character Sets To Be Used With Escape Sequences](https://www.itscj.ipsj.or.jp/itscj_english/iso-ir/ISO-IR.pdf)
|
||||
- [Japanese Character Encoding for Internet Messages](https://tools.ietf.org/html/rfc1468)
|
||||
|
||||
## License
|
||||
|
||||
|
@ -2,78 +2,14 @@
|
||||
|
||||
The fields of the `pages.csv` manifest are `codepage,url,bytes` (SBCS=1, DBCS=2)
|
||||
|
||||
```>pages.csv
|
||||
37,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT,1
|
||||
437,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT,1
|
||||
500,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT,1
|
||||
737,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT,1
|
||||
775,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT,1
|
||||
850,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT,1
|
||||
852,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT,1
|
||||
855,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT,1
|
||||
857,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT,1
|
||||
860,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT,1
|
||||
861,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT,1
|
||||
862,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT,1
|
||||
863,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT,1
|
||||
864,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT,1
|
||||
865,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT,1
|
||||
866,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT,1
|
||||
869,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT,1
|
||||
874,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT,1
|
||||
875,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT,1
|
||||
932,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT,2
|
||||
936,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT,2
|
||||
949,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT,2
|
||||
950,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT,2
|
||||
1026,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT,1
|
||||
1250,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT,1
|
||||
1251,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT,1
|
||||
1252,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT,1
|
||||
1253,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT,1
|
||||
1254,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT,1
|
||||
1255,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT,1
|
||||
1256,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT,1
|
||||
1257,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT,1
|
||||
1258,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT,1
|
||||
47451,http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT,1
|
||||
```
|
||||
|
||||
Note that the Windows rendering is used for the Mac code pages. The primary
|
||||
difference is the use of the private `0xF8FF` code (which renders as an Apple
|
||||
logo on macs but as garbage on other operating systems). It may be desirable
|
||||
to fall back to the behavior, in which case the files are under APPLE and not
|
||||
MICSFT. Codepages are an absolute pain :/
|
||||
|
||||
```>pages.csv
|
||||
10000,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT,1
|
||||
10006,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT,1
|
||||
10007,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT,1
|
||||
10029,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT,1
|
||||
10079,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT,1
|
||||
10081,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT,1
|
||||
```
|
||||
MICSFT. This affects codepages 10000, 10006, 10007, 10029, 10079, 10081
|
||||
|
||||
The numbering scheme for the `ISO-8859-X` series is `28590 + X`:
|
||||
|
||||
```>pages.csv
|
||||
28591,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT,1
|
||||
28592,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT,1
|
||||
28593,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT,1
|
||||
28594,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT,1
|
||||
28595,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT,1
|
||||
28596,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT,1
|
||||
28597,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT,1
|
||||
28598,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT,1
|
||||
28599,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT,1
|
||||
28600,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT,1
|
||||
28601,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT,1
|
||||
28603,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT,1
|
||||
28604,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT,1
|
||||
28605,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT,1
|
||||
28606,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-16.TXT,1
|
||||
```
|
||||
|
||||
## Generated Codepages
|
||||
|
||||
The following codepages are available in .NET on Windows:
|
||||
@ -142,11 +78,6 @@ The following codepages are available in .NET on Windows:
|
||||
- 21866 Ukrainian (KOI8-U); Cyrillic (KOI8-U)
|
||||
- 29001 Europa 3
|
||||
- 38598 ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
|
||||
- 50220 ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
|
||||
- 50221 ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS Allow 1 byte Kana)
|
||||
- 50222 ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS Allow 1 byte Kana - SO/SI)
|
||||
- 50225 ISO 2022 Korean
|
||||
- 50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
|
||||
- 51932 EUC Japanese
|
||||
- 51936 EUC Simplified Chinese; Chinese Simplified (EUC)
|
||||
- 51949 EUC Korean
|
||||
@ -163,107 +94,11 @@ The following codepages are available in .NET on Windows:
|
||||
- 57010 ISCII Gujarati
|
||||
- 57011 ISCII Punjabi
|
||||
|
||||
```>pages.csv
|
||||
708,,1
|
||||
720,,1
|
||||
808,,1
|
||||
858,,1
|
||||
870,,1
|
||||
872,,1
|
||||
1010,,1
|
||||
1047,,1
|
||||
1132,,1
|
||||
1140,,1
|
||||
1141,,1
|
||||
1142,,1
|
||||
1143,,1
|
||||
1144,,1
|
||||
1145,,1
|
||||
1146,,1
|
||||
1147,,1
|
||||
1148,,1
|
||||
1149,,1
|
||||
1361,,2
|
||||
10001,,2
|
||||
10002,,2
|
||||
10003,,2
|
||||
10004,,1
|
||||
10005,,1
|
||||
10008,,2
|
||||
10010,,1
|
||||
10017,,1
|
||||
10021,,1
|
||||
10082,,1
|
||||
20000,,2
|
||||
20001,,2
|
||||
20002,,2
|
||||
20003,,2
|
||||
20004,,2
|
||||
20005,,2
|
||||
20105,,1
|
||||
20106,,1
|
||||
20107,,1
|
||||
20108,,1
|
||||
20261,,2
|
||||
20269,,1
|
||||
20273,,1
|
||||
20277,,1
|
||||
20278,,1
|
||||
20280,,1
|
||||
20284,,1
|
||||
20285,,1
|
||||
20290,,1
|
||||
20297,,1
|
||||
20420,,1
|
||||
20423,,1
|
||||
20424,,1
|
||||
20833,,1
|
||||
20838,,1
|
||||
20866,,1
|
||||
20871,,1
|
||||
20880,,1
|
||||
20905,,1
|
||||
20924,,1
|
||||
20932,,2
|
||||
20936,,2
|
||||
20949,,2
|
||||
21025,,1
|
||||
21027,,1
|
||||
21866,,1
|
||||
29001,,1
|
||||
38598,,1
|
||||
50220,,2
|
||||
50221,,2
|
||||
50222,,2
|
||||
50225,,2
|
||||
50227,,2
|
||||
51932,,2
|
||||
51936,,2
|
||||
51949,,2
|
||||
52936,,2
|
||||
54936,,2
|
||||
57002,,2
|
||||
57003,,2
|
||||
57004,,2
|
||||
57005,,2
|
||||
57006,,2
|
||||
57007,,2
|
||||
57008,,2
|
||||
57009,,2
|
||||
57010,,2
|
||||
57011,,2
|
||||
```
|
||||
|
||||
The following codepages are dependencies for Visual FoxPro:
|
||||
|
||||
- 620 Mazovia (Polish) MS-DOS
|
||||
- 895 Kamenick (Czech) MS-DOS
|
||||
|
||||
```>pages.csv
|
||||
620,,1
|
||||
895,,1
|
||||
```
|
||||
|
||||
## Building Notes
|
||||
|
||||
The script `make.sh` (described later) will get these files and massage the data
|
||||
@ -289,13 +124,7 @@ which implies that code `0xF6` is `String.fromCharCode(0x02C6)` and vice versa.
|
||||
|
||||
To build the sources on windows, consult `dotnet/MakeEncoding.cs`.
|
||||
|
||||
After saving the standard output to `out`, a simple script processes the result:
|
||||
|
||||
```>dotnet.sh
|
||||
#!/bin/bash
|
||||
if [ ! -e dotnet/out ]; then exit; fi
|
||||
<dotnet/out tr -s ' ' '\t' | awk 'NF>2 {if(outfile) close(outfile); outfile="codepages/" $1 ".TBL"} NF==2 {print > outfile}'
|
||||
```
|
||||
After saving standard output to `out`, the `dotnet.sh` script processes results.
|
||||
|
||||
# Building the script
|
||||
|
||||
@ -304,43 +133,9 @@ generates JS code for encoding and decoding:
|
||||
|
||||
## Raw Codepages
|
||||
|
||||
```>make.njs
|
||||
#!/usr/bin/env node
|
||||
var argv = process.argv.slice(1), fs = require('fs');
|
||||
if(argv.length < 2) {
|
||||
console.error("usage: make.njs <codepage_index> [variable]");
|
||||
process.exit(22); /* EINVAL */
|
||||
}
|
||||
|
||||
var cp/*:string*/ = argv[1];
|
||||
var jsvar/*:string*/ = argv[2] || "cptable";
|
||||
var x/*:string*/ = fs.readFileSync("codepages/" + cp + ".TBL","utf8");
|
||||
var maxcp = 0, i = 0, ii = 0;
|
||||
|
||||
var y/*:Array<Array<number> >*/ = x.split("\n").map(function(z/*:string*/)/*:Array<number>*/ {
|
||||
var w/*:Array<string>*/ = z.split("\t");
|
||||
if(w.length < 2) return [Number(w[0])];
|
||||
return [Number(w[0]), Number(w[1])];
|
||||
}).filter(function(z) { return z.length > 1; });
|
||||
```
|
||||
|
||||
The DBCS and SBCS code generation strategies are different. The maximum code is
|
||||
used to distinguish (max `0xFF` for SBCS).
|
||||
|
||||
```
|
||||
for(i = 0; i != y.length; ++i) if(y[i][0] > maxcp) maxcp = y[i][0];
|
||||
|
||||
var enc/*:{[key:string]:number}*/ = {}, dec/*:{[key:string]:string}|Array<string>*/ = (maxcp < 256 ? [] : {});
|
||||
for(i = 0; i != y.length; ++i) {
|
||||
/*:: if(Array.isArray(dec)) */ dec[y[i][0]] = String.fromCharCode(y[i][1]);
|
||||
enc[String.fromCharCode(y[i][1])] = y[i][0];
|
||||
}
|
||||
|
||||
var odec = "", outstr = "";
|
||||
if(maxcp < 256) {
|
||||
/*:: if(Array.isArray(dec)) { */
|
||||
```
|
||||
|
||||
The Unicode character `0xFFFD` (REPLACEMENT CHARACTER) is used as a placeholder
|
||||
for characters that are not specified in the map (for example, `0xF0` is not in
|
||||
code page 10000).
|
||||
@ -348,14 +143,6 @@ code page 10000).
|
||||
For SBCS, the idea is to embed a raw string with the contents of the 256 codes.
|
||||
The `dec` field is merely a split of the string, and `enc` is an eversion:
|
||||
|
||||
```
|
||||
for(i = 0; i != 256; ++i) if(typeof dec[i] === "undefined") dec[i] = String.fromCharCode(0xFFFD);
|
||||
odec = JSON.stringify(dec.join(""));
|
||||
outstr = '(function(){ var d = ' + odec + ', D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();';
|
||||
/*:: } */
|
||||
} else {
|
||||
```
|
||||
|
||||
DBCS is similar, except that the space is sliced in chunks of 256 bytes (strings
|
||||
are only generated for those high-bytes represented in the codepage).
|
||||
|
||||
@ -363,27 +150,6 @@ The strategy is to construct an array-of-arrays so that `dd[high][low]` is the
|
||||
character associated with the code. This array is combined at runtime to yield
|
||||
the complete decoding object (and the encoding object is an eversion):
|
||||
|
||||
```
|
||||
var dd = [];
|
||||
/*:: if(!Array.isArray(dec)) { */
|
||||
for(i in dec) if(dec.hasOwnProperty(i)) {
|
||||
ii = +i;
|
||||
if(typeof dd[ii >> 8] === "undefined") dd[ii >> 8] = [];
|
||||
dd[ii >> 8][ii % 256] = dec[i];
|
||||
}
|
||||
/*:: } */
|
||||
outstr = '(function(){ var d = [], e = {}, D = [], j;\n';
|
||||
for(var i = 0; i != 256; ++i) if(dd[i]) {
|
||||
for(var j = 0; j != 256; ++j) if(typeof dd[i][j] === "undefined") dd[i][j] = String.fromCharCode(0xFFFD);
|
||||
outstr += 'D[' + i + '] = ' + JSON.stringify(dd[i].join("")) + '.split("");\n';
|
||||
outstr += 'for(j = 0; j != D[' + i + '].length; ++j) if(D[' + i + '][j].charCodeAt(0) !== 0xFFFD) { e[D[' + i + '][j]] = ' + (i*256) + ' + j; d[' + (i*256) + ' + j] = D[' + i + '][j];}\n'
|
||||
}
|
||||
outstr += 'return {"enc": e, "dec": d }; })();';
|
||||
}
|
||||
process.stdout.write(jsvar + "[" + cp + "] = " + outstr + "\n");
|
||||
|
||||
```
|
||||
|
||||
`make.sh` generates the tables used by `make.njs`. The raw Unicode TXT files
|
||||
are columnar: `code unicode #comments`. For example, the last 10 lines of the
|
||||
text file `ROMAN.TXT` (for CP 10000) are:
|
||||
@ -404,35 +170,6 @@ text file `ROMAN.TXT` (for CP 10000) are:
|
||||
In processing the data, the comments (after the `#`) are stripped and undefined
|
||||
elements (like `0x7F` for CP 10000) are removed.
|
||||
|
||||
```>make.sh
|
||||
#!/bin/bash
|
||||
INFILE=${1:-pages.csv}
|
||||
OUTFILE=${2:-cptable.js}
|
||||
JSVAR=${3:-cptable}
|
||||
VERSION=$(cat package.json | grep version | tr -dc [0-9.])
|
||||
|
||||
mkdir -p codepages bits
|
||||
rm -f $OUTFILE $OUTFILE.tmp
|
||||
echo "/* $OUTFILE (C) 2013-present SheetJS -- http://sheetjs.com */" > $OUTFILE.tmp
|
||||
echo "/*jshint -W100 */" >> $OUTFILE.tmp
|
||||
echo "var $JSVAR = {version:\"$VERSION\"};" >> $OUTFILE.tmp
|
||||
if [ -e dotnet.sh ]; then bash dotnet.sh; fi
|
||||
awk -F, '{print $1, $2, $3}' $INFILE | while read cp url cptype; do
|
||||
echo $cp $url
|
||||
if [ ! -e codepages/$cp.TBL ]; then
|
||||
curl $url | sed 's/#.*//g' | awk 'NF==2' > codepages/$cp.TBL
|
||||
fi
|
||||
echo "if(typeof $JSVAR === 'undefined') $JSVAR = {};" > bits/$cp.js.tmp
|
||||
node make.njs $cp $JSVAR | tee -a bits/$cp.js.tmp >> $OUTFILE.tmp
|
||||
sed 's/"\([0-9]+\)":/\1:/g' <bits/$cp.js.tmp >bits/$cp.js
|
||||
rm -f bits/$cp.js.tmp
|
||||
done
|
||||
echo "// eslint-disable-next-line no-undef" >> $OUTFILE.tmp
|
||||
echo "if (typeof module !== 'undefined' && module.exports && typeof DO_NOT_EXPORT_CODEPAGE === 'undefined') module.exports = $JSVAR;" >> $OUTFILE.tmp
|
||||
sed 's/"\([0-9]+\)":/\1:/g' <$OUTFILE.tmp >$OUTFILE
|
||||
rm -f $OUTFILE.tmp
|
||||
```
|
||||
|
||||
## Utilities
|
||||
|
||||
The encode and decode functions are kept in a separate script (`cputils.js`).
|
||||
@ -446,412 +183,8 @@ Both encode and decode deal with data represented as:
|
||||
The `ofmt` variable controls `encode` output (`str`, `arr` respectively)
|
||||
while the input format is automatically determined.
|
||||
|
||||
# Tests
|
||||
|
||||
```>test.js
|
||||
var fs = require('fs'), assert = require('assert'), vm = require('vm');
|
||||
var cptable, sbcs;
|
||||
|
||||
```
|
||||
|
||||
Due to a bug in `Buffer.from` in node `4.0 - 4.4`, a special check is needed:
|
||||
|
||||
```>test.js
|
||||
var Buffer_from = function(){};
|
||||
|
||||
if(typeof Buffer !== 'undefined') {
|
||||
var nbfs = !Buffer.from;
|
||||
if(!nbfs) try { Buffer.from("foo", "utf8"); } catch(e) { nbfs = true; }
|
||||
Buffer_from = nbfs ? function(buf, enc) { return (enc) ? new Buffer(buf, enc) : new Buffer(buf); } : Buffer.from.bind(Buffer);
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
The tests include JS validity tests (requiring or evaluating code):
|
||||
|
||||
```>test.js
|
||||
describe('source', function() {
|
||||
it('should load node', function() { cptable = require('./'); });
|
||||
it('should load sbcs', function() { sbcs = require('./sbcs'); });
|
||||
it('should load excel', function() { excel = require('./cpexcel'); });
|
||||
it('should process bits', function() {
|
||||
var files = fs.readdirSync('bits').filter(function(x){return x.substr(-3)==".js";});
|
||||
files.forEach(function(x) {
|
||||
vm.runInThisContext(fs.readFileSync('./bits/' + x));
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The README tests verify the snippets in the README:
|
||||
|
||||
```>test.js
|
||||
describe('README', function() {
|
||||
var readme = function() {
|
||||
var unicode_cp10000_255 = cptable[10000].dec[255]; //
|
||||
assert.equal(unicode_cp10000_255, "");
|
||||
|
||||
var cp10000_711 = cptable[10000].enc[String.fromCharCode(711)]; // 255
|
||||
assert.equal(cp10000_711, 255);
|
||||
|
||||
var b1 = [0xbb,0xe3,0xd7,0xdc];
|
||||
var s1 = b1.map(function(x) { return String.fromCharCode(x); }).join("");
|
||||
var = cptable.utils.decode(936, b1);
|
||||
var buf = cptable.utils.encode(936, );
|
||||
assert.equal(,"");
|
||||
assert.equal(buf.length, 4);
|
||||
for(var i = 0; i != 4; ++i) assert.equal(b1[i], buf[i]);
|
||||
|
||||
var b2 = [0xf0,0x9f,0x8d,0xa3];
|
||||
var sushi= cptable.utils.decode(65001, b2);
|
||||
var sbuf = cptable.utils.encode(65001, sushi);
|
||||
assert.equal(sushi,"");
|
||||
assert.equal(sbuf.length, 4);
|
||||
for(var i = 0; i != 4; ++i) assert.equal(b2[i], sbuf[i]);
|
||||
|
||||
};
|
||||
it('should be correct', function() {
|
||||
cptable.utils.cache.encache();
|
||||
readme();
|
||||
cptable.utils.cache.decache();
|
||||
readme();
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The consistency tests make sure that encoding and decoding are pseudo inverses:
|
||||
|
||||
```>test.js
|
||||
describe('consistency', function() {
|
||||
cptable = require('./');
|
||||
U = cptable.utils;
|
||||
var chk = function(cptable, cacheit) { return function(x) {
|
||||
it('should consistently process CP ' + x, function() {
|
||||
var cp = cptable[x], D = cp.dec, E = cp.enc;
|
||||
if(cacheit) cptable.utils.cache.encache();
|
||||
else cptable.utils.cache.decache();
|
||||
Object.keys(D).forEach(function(d) {
|
||||
if(E[D[d]] != d) {
|
||||
if(typeof E[D[d]] !== "undefined") return;
|
||||
if(D[d].charCodeAt(0) == 0xFFFD) return;
|
||||
if(D[E[D[d]]] === D[d]) return;
|
||||
throw new Error(x + " e.d[" + d + "] = " + E[D[d]] + "; d[" + d + "]=" + D[d] + "; d.e.d[" + d + "] = " + D[E[D[d]]]);
|
||||
}
|
||||
});
|
||||
Object.keys(E).forEach(function(e) {
|
||||
if(D[E[e]] != e) {
|
||||
throw new Error(x + " d.e[" + e + "] = " + D[E[e]] + "; e[" + e + "]=" + E[e] + "; e.d.e[" + e + "] = " + E[D[E[e]]]);
|
||||
}
|
||||
});
|
||||
var corpus = ["foobar"];
|
||||
corpus.forEach(function(w){
|
||||
assert.equal(U.decode(x,U.encode(x,w)),w);
|
||||
});
|
||||
cptable.utils.cache.encache();
|
||||
});
|
||||
}; };
|
||||
describe('cached', function() {
|
||||
Object.keys(cptable).filter(function(w) { return w == +w; }).forEach(chk(cptable, true));
|
||||
});
|
||||
describe('direct', function() {
|
||||
Object.keys(cptable).filter(function(w) { return w == +w; }).forEach(chk(cptable, false));
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The next tests look at possible entry conditions:
|
||||
|
||||
```
|
||||
describe('entry conditions', function() {
|
||||
it('should fail to load utils if cptable unavailable', function() {
|
||||
var sandbox = {};
|
||||
var ctx = vm.createContext(sandbox);
|
||||
assert.throws(function() {
|
||||
vm.runInContext(fs.readFileSync('cputils.js','utf8'),ctx);
|
||||
});
|
||||
});
|
||||
it('should load utils if cptable is available', function() {
|
||||
var sandbox = {};
|
||||
var ctx = vm.createContext(sandbox);
|
||||
vm.runInContext(fs.readFileSync('cpexcel.js','utf8'),ctx);
|
||||
vm.runInContext(fs.readFileSync('cputils.js','utf8'),ctx);
|
||||
});
|
||||
var chken = function(cp, i) {
|
||||
var c = function(cp, i, e) {
|
||||
var str = cptable.utils.encode(cp,i,e);
|
||||
var arr = cptable.utils.encode(cp,i.split(""),e);
|
||||
assert.deepEqual(str,arr);
|
||||
if(typeof Buffer === 'undefined') return;
|
||||
var buf = cptable.utils.encode(cp,Buffer_from(i),e);
|
||||
assert.deepEqual(str,buf);
|
||||
};
|
||||
cptable.utils.cache.encache();
|
||||
c(cp,i);
|
||||
c(cp,i,'buf');
|
||||
c(cp,i,'arr');
|
||||
c(cp,i,'str');
|
||||
cptable.utils.cache.decache();
|
||||
c(cp,i);
|
||||
c(cp,i,'buf');
|
||||
c(cp,i,'arr');
|
||||
c(cp,i,'str');
|
||||
};
|
||||
describe('encode', function() {
|
||||
it('CP 1252 : sbcs', function() { chken(1252,"foobr"); });
|
||||
it('CP 708 : sbcs', function() { chken(708," and smiley faces");});
|
||||
it('CP 936 : dbcs', function() { chken(936, "");});
|
||||
});
|
||||
var chkde = function(cp, i) {
|
||||
var c = function(cp, i) {
|
||||
var s;
|
||||
if(typeof Buffer !== 'undefined' && i instanceof Buffer) s = [].map.call(i, function(s){return String.fromCharCode(s); });
|
||||
else s=(i.map) ? i.map(function(s){return String.fromCharCode(s); }) : i;
|
||||
var str = cptable.utils.decode(cp,i);
|
||||
var arr = cptable.utils.decode(cp,s.join?s.join(""):s);
|
||||
assert.deepEqual(str,arr);
|
||||
if(typeof Buffer === 'undefined') return;
|
||||
var buf = cptable.utils.decode(cp,Buffer_from(i));
|
||||
assert.deepEqual(str,buf);
|
||||
};
|
||||
cptable.utils.cache.encache();
|
||||
c(cp,i);
|
||||
cptable.utils.cache.decache();
|
||||
c(cp,i);
|
||||
};
|
||||
describe('decode', function() {
|
||||
it('CP 1252 : sbcs', function() { chkde(1252,[0x66, 0x6f, 0x6f, 0x62, 0x61, 0x72]); }); /* "foobar" */
|
||||
if(typeof Buffer !== 'undefined') it('CP 708 : sbcs', function() { chkde(708, Buffer_from([0xca, 0x20, 0x61, 0x6e, 0x64, 0x20, 0xcb, 0x20, 0x73, 0x6d, 0x69, 0x6c, 0x65, 0x79, 0x20, 0x66, 0x61, 0x63, 0x65, 0x73])); }); /* (" and smiley faces") */
|
||||
it('CP 936 : dbcs', function() { chkde(936, [0xd5, 0xe2, 0xca, 0xc7, 0xd6, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xb7, 0xfb, 0xb2, 0xe2, 0xca, 0xd4]);}); /* "" */
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The `testfile` helper function reads a file and compares to node's read facilities:
|
||||
|
||||
```>test.js
|
||||
function testfile(f,cp,type,skip) {
|
||||
var d = fs.readFileSync(f);
|
||||
var x = fs.readFileSync(f, type);
|
||||
var a = x.split("");
|
||||
var chk = function(cp) {
|
||||
var y = cptable.utils.decode(cp, d);
|
||||
assert.equal(x,y);
|
||||
var z = cptable.utils.encode(cp, x);
|
||||
if(z.length != d.length) throw new Error(f + " " + JSON.stringify(z) + " != " + JSON.stringify(d) + " : " + z.length + " " + d.length);
|
||||
for(var i = 0; i != d.length; ++i) if(d[i] !== z[i]) throw new Error("" + i + " " + d[i] + "!=" + z[i]);
|
||||
if(skip) return;
|
||||
z = cptable.utils.encode(cp, a);
|
||||
if(z.length != d.length) throw new Error(f + " " + JSON.stringify(z) + " != " + JSON.stringify(d) + " : " + z.length + " " + d.length);
|
||||
for(var i = 0; i != d.length; ++i) if(d[i] !== z[i]) throw new Error("" + i + " " + d[i] + "!=" + z[i]);
|
||||
if(f.indexOf("cptable.js") == -1) {
|
||||
cptable.utils.encode(cp, d, 'str');
|
||||
cptable.utils.encode(cp, d, 'arr');
|
||||
}
|
||||
}
|
||||
cptable.utils.cache.encache();
|
||||
chk(cp);
|
||||
if(skip) return;
|
||||
cptable.utils.cache.decache();
|
||||
chk(cp);
|
||||
cptable.utils.cache.encache();
|
||||
}
|
||||
```
|
||||
|
||||
The `utf8` tests verify UTF-8 encoding of the actual JS sources:
|
||||
|
||||
```>test.js
|
||||
describe('node natives', function() {
|
||||
var node = [[65001, 'utf8',1], [1200, 'utf16le',1], [20127, 'ascii',0]];
|
||||
var unicodefiles = ['codepage.md','README.md','cptable.js'];
|
||||
var asciifiles = ['cputils.js'];
|
||||
node.forEach(function(w) {
|
||||
describe(w[1], function() {
|
||||
cptable = require('./');
|
||||
asciifiles.forEach(function(f) {
|
||||
it('should process ' + f, function() { testfile('./misc/'+f+'.'+w[1],w[0],w[1]); });
|
||||
});
|
||||
if(!w[2]) return;
|
||||
unicodefiles.forEach(function(f) {
|
||||
it('should process ' + f, function() { testfile('./misc/'+f+'.'+w[1],w[0],w[1]); });
|
||||
});
|
||||
if(w[1] === 'utf8') it('should process bits', function() {
|
||||
var files = fs.readdirSync('bits').filter(function(x){return x.substr(-3)==".js";});
|
||||
files.forEach(function(f) { testfile('./bits/' + f,w[0],w[1],true); });
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The `utf*` and `ascii` tests attempt to test other magic formats:
|
||||
|
||||
```>test.js
|
||||
var m = cptable.utils.magic;
|
||||
function cmp(x,z) {
|
||||
assert.equal(x.length, z.length);
|
||||
for(var i = 0; i != z.length; ++i) assert.equal(i+"/"+x.length+""+x[i], i+"/"+z.length+""+z[i]);
|
||||
}
|
||||
Object.keys(m).forEach(function(t){if(t != 16969) describe(m[t], function() {
|
||||
it("should process codepage.md." + m[t], fs.existsSync('./misc/codepage.md.' + m[t]) ?
|
||||
function() {
|
||||
var b = fs.readFileSync('./misc/codepage.md.utf8', "utf8");
|
||||
if(m[t] === "ascii") b = b.replace(/[\u0080-\uffff]*/g,"");
|
||||
var x = fs.readFileSync('./misc/codepage.md.' + m[t]);
|
||||
var y, z;
|
||||
cptable.utils.cache.encache();
|
||||
y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
z = cptable.utils.encode(t, y);
|
||||
if(t != 65000) cmp(x,z);
|
||||
else { assert.equal(y, cptable.utils.decode(t, z)); }
|
||||
cptable.utils.cache.decache();
|
||||
y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
z = cptable.utils.encode(t, y);
|
||||
if(t != 65000) cmp(x,z);
|
||||
else { assert.equal(y, cptable.utils.decode(t, z)); }
|
||||
cptable.utils.cache.encache();
|
||||
cptable.utils.encode(t, y, 'str');
|
||||
cptable.utils.encode(t, y, 'arr');
|
||||
cptable.utils.cache.decache();
|
||||
cptable.utils.encode(t, y, 'str');
|
||||
cptable.utils.encode(t, y, 'arr');
|
||||
cptable.utils.cache.encache();
|
||||
}
|
||||
: null);
|
||||
it("should process README.md." + m[t], fs.existsSync('./misc/README.md.' + m[t]) ?
|
||||
function() {
|
||||
var b = fs.readFileSync('./misc/README.md.utf8', "utf8");
|
||||
if(m[t] === "ascii") b = b.replace(/[\u0080-\uffff]*/g,"");
|
||||
var x = fs.readFileSync('./misc/README.md.' + m[t]);
|
||||
x = [].slice.call(x);
|
||||
cptable.utils.cache.encache();
|
||||
var y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
cptable.utils.cache.decache();
|
||||
var y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
cptable.utils.cache.encache();
|
||||
}
|
||||
: null);
|
||||
});});
|
||||
```
|
||||
|
||||
The codepage `6969` is not defined, so operations should fail:
|
||||
|
||||
```>test.js
|
||||
describe('failures', function() {
|
||||
it('should fail to find CP 6969', function() {
|
||||
assert.throws(function(){cptable[6969].dec});
|
||||
assert.throws(function(){cptable[6969].enc});
|
||||
});
|
||||
it('should fail using utils', function() {
|
||||
assert(!cptable.utils.hascp(6969));
|
||||
assert.throws(function(){return cptable.utils.encode(6969, "foobar"); });
|
||||
assert.throws(function(){return cptable.utils.decode(6969, [0x20]); });
|
||||
});
|
||||
it('should fail with black magic', function() {
|
||||
assert(cptable.utils.hascp(16969));
|
||||
assert.throws(function(){return cptable.utils.encode(16969, "foobar"); });
|
||||
assert.throws(function(){return cptable.utils.decode(16969, [0x20]); });
|
||||
});
|
||||
it('should fail when presented with invalid char codes', function() {
|
||||
assert.throws(function(){cptable.utils.cache.decache(); return cptable.utils.encode(20127, [String.fromCharCode(0xAA)]);});
|
||||
});
|
||||
it('should fail to propagate UTF8 BOM in UTF7', function() {
|
||||
["+/v8-abc", "+/v9"].forEach(function(m) { assert.throws(function() {
|
||||
assert.equal(m, cptable.utils.encode(65000, cptable.utils.decode(65000, m)));
|
||||
}); });
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
# Nitty Gritty
|
||||
|
||||
```json>package.json
|
||||
{
|
||||
"name": "codepage",
|
||||
"version": "1.14.0",
|
||||
"author": "SheetJS",
|
||||
"description": "pure-JS library to handle codepages",
|
||||
"keywords": [ "codepage", "iconv", "convert", "strings" ],
|
||||
"bin": {
|
||||
"codepage": "./bin/codepage.njs"
|
||||
},
|
||||
"main": "cputils.js",
|
||||
"types": "types",
|
||||
"browser": {
|
||||
"buffer": "false"
|
||||
},
|
||||
"dependencies": {
|
||||
"commander": "~2.14.1",
|
||||
"exit-on-epipe": "~1.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"voc": "~1.1.0",
|
||||
"mocha": "~2.5.3",
|
||||
"blanket": "~1.2.3",
|
||||
"@sheetjs/uglify-js": "~2.7.3",
|
||||
"@types/node": "^8.0.7",
|
||||
"@types/commander": "^2.12.0",
|
||||
"dtslint": "^0.1.2",
|
||||
"typescript": "2.2.0"
|
||||
},
|
||||
"repository": { "type":"git", "url":"git://github.com/SheetJS/js-codepage.git"},
|
||||
"scripts": {
|
||||
"pretest": "git submodule init && git submodule update",
|
||||
"test": "make test",
|
||||
"build": "make js",
|
||||
"lint": "make fullint",
|
||||
"dtslint": "dtslint types"
|
||||
},
|
||||
"config": {
|
||||
"blanket": {
|
||||
"pattern": "[cputils.js]"
|
||||
}
|
||||
},
|
||||
"alex": {
|
||||
"allow": [
|
||||
"chinese",
|
||||
"european",
|
||||
"german",
|
||||
"japanese",
|
||||
"latin"
|
||||
]
|
||||
},
|
||||
"homepage": "http://sheetjs.com/opensource",
|
||||
"files": [
|
||||
"LICENSE",
|
||||
"README.md",
|
||||
"bin",
|
||||
"bits/*.js",
|
||||
"types/index.d.ts",
|
||||
"types/*.json",
|
||||
"cptable.js",
|
||||
"cputils.js",
|
||||
"dist/sbcs.full.js",
|
||||
"dist/cpexcel.full.js"
|
||||
],
|
||||
"bugs": { "url": "https://github.com/SheetJS/js-codepage/issues" },
|
||||
"license": "Apache-2.0",
|
||||
"engines": { "node": ">=0.8" }
|
||||
}
|
||||
```
|
||||
|
||||
```>.vocrc
|
||||
{ "post": "make js" }
|
||||
```
|
||||
|
||||
```>.gitignore
|
||||
node_modules
|
||||
package-lock.json
|
||||
*.tgz
|
||||
.gitignore
|
||||
codepages/
|
||||
.vocrc
|
||||
make.sh
|
||||
make.njs
|
||||
misc/coverage.html
|
||||
codepage_mini.md
|
||||
ctest/sauce*
|
||||
```
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -2,78 +2,14 @@
|
||||
|
||||
The fields of the +AGA-pages.csv+AGA manifest are +AGA-codepage,url,bytes+AGA (SBCS+AD0-1, DBCS+AD0-2)
|
||||
|
||||
+AGAAYABgAD4-pages.csv
|
||||
37,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT,1
|
||||
437,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT,1
|
||||
500,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT,1
|
||||
737,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT,1
|
||||
775,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT,1
|
||||
850,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT,1
|
||||
852,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT,1
|
||||
855,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT,1
|
||||
857,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT,1
|
||||
860,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT,1
|
||||
861,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT,1
|
||||
862,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT,1
|
||||
863,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT,1
|
||||
864,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT,1
|
||||
865,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT,1
|
||||
866,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT,1
|
||||
869,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT,1
|
||||
874,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT,1
|
||||
875,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT,1
|
||||
932,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT,2
|
||||
936,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT,2
|
||||
949,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT,2
|
||||
950,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT,2
|
||||
1026,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT,1
|
||||
1250,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT,1
|
||||
1251,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT,1
|
||||
1252,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT,1
|
||||
1253,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT,1
|
||||
1254,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT,1
|
||||
1255,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT,1
|
||||
1256,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT,1
|
||||
1257,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT,1
|
||||
1258,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT,1
|
||||
47451,http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT,1
|
||||
+AGAAYABg
|
||||
|
||||
Note that the Windows rendering is used for the Mac code pages. The primary
|
||||
difference is the use of the private +AGA-0xF8FF+AGA code (which renders as an Apple
|
||||
logo on macs but as garbage on other operating systems). It may be desirable
|
||||
to fall back to the behavior, in which case the files are under APPLE and not
|
||||
MICSFT. Codepages are an absolute pain :/
|
||||
|
||||
+AGAAYABgAD4-pages.csv
|
||||
10000,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT,1
|
||||
10006,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT,1
|
||||
10007,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT,1
|
||||
10029,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT,1
|
||||
10079,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT,1
|
||||
10081,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT,1
|
||||
+AGAAYABg
|
||||
MICSFT. This affects codepages 10000, 10006, 10007, 10029, 10079, 10081
|
||||
|
||||
The numbering scheme for the +AGA-ISO-8859-X+AGA series is +AGA-28590 +- X+AGA:
|
||||
|
||||
+AGAAYABgAD4-pages.csv
|
||||
28591,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT,1
|
||||
28592,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT,1
|
||||
28593,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT,1
|
||||
28594,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT,1
|
||||
28595,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT,1
|
||||
28596,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT,1
|
||||
28597,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT,1
|
||||
28598,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT,1
|
||||
28599,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT,1
|
||||
28600,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT,1
|
||||
28601,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT,1
|
||||
28603,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT,1
|
||||
28604,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT,1
|
||||
28605,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT,1
|
||||
28606,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-16.TXT,1
|
||||
+AGAAYABg
|
||||
|
||||
+ACMAIw Generated Codepages
|
||||
|
||||
The following codepages are available in .NET on Windows:
|
||||
@ -142,11 +78,6 @@ The following codepages are available in .NET on Windows:
|
||||
- 21866 Ukrainian (KOI8-U)+ADs Cyrillic (KOI8-U)
|
||||
- 29001 Europa 3
|
||||
- 38598 ISO 8859-8 Hebrew+ADs Hebrew (ISO-Logical)
|
||||
- 50220 ISO 2022 Japanese with no halfwidth Katakana+ADs Japanese (JIS)
|
||||
- 50221 ISO 2022 Japanese with halfwidth Katakana+ADs Japanese (JIS Allow 1 byte Kana)
|
||||
- 50222 ISO 2022 Japanese JIS X 0201-1989+ADs Japanese (JIS Allow 1 byte Kana - SO/SI)
|
||||
- 50225 ISO 2022 Korean
|
||||
- 50227 ISO 2022 Simplified Chinese+ADs Chinese Simplified (ISO 2022)
|
||||
- 51932 EUC Japanese
|
||||
- 51936 EUC Simplified Chinese+ADs Chinese Simplified (EUC)
|
||||
- 51949 EUC Korean
|
||||
@ -163,107 +94,11 @@ The following codepages are available in .NET on Windows:
|
||||
- 57010 ISCII Gujarati
|
||||
- 57011 ISCII Punjabi
|
||||
|
||||
+AGAAYABgAD4-pages.csv
|
||||
708,,1
|
||||
720,,1
|
||||
808,,1
|
||||
858,,1
|
||||
870,,1
|
||||
872,,1
|
||||
1010,,1
|
||||
1047,,1
|
||||
1132,,1
|
||||
1140,,1
|
||||
1141,,1
|
||||
1142,,1
|
||||
1143,,1
|
||||
1144,,1
|
||||
1145,,1
|
||||
1146,,1
|
||||
1147,,1
|
||||
1148,,1
|
||||
1149,,1
|
||||
1361,,2
|
||||
10001,,2
|
||||
10002,,2
|
||||
10003,,2
|
||||
10004,,1
|
||||
10005,,1
|
||||
10008,,2
|
||||
10010,,1
|
||||
10017,,1
|
||||
10021,,1
|
||||
10082,,1
|
||||
20000,,2
|
||||
20001,,2
|
||||
20002,,2
|
||||
20003,,2
|
||||
20004,,2
|
||||
20005,,2
|
||||
20105,,1
|
||||
20106,,1
|
||||
20107,,1
|
||||
20108,,1
|
||||
20261,,2
|
||||
20269,,1
|
||||
20273,,1
|
||||
20277,,1
|
||||
20278,,1
|
||||
20280,,1
|
||||
20284,,1
|
||||
20285,,1
|
||||
20290,,1
|
||||
20297,,1
|
||||
20420,,1
|
||||
20423,,1
|
||||
20424,,1
|
||||
20833,,1
|
||||
20838,,1
|
||||
20866,,1
|
||||
20871,,1
|
||||
20880,,1
|
||||
20905,,1
|
||||
20924,,1
|
||||
20932,,2
|
||||
20936,,2
|
||||
20949,,2
|
||||
21025,,1
|
||||
21027,,1
|
||||
21866,,1
|
||||
29001,,1
|
||||
38598,,1
|
||||
50220,,2
|
||||
50221,,2
|
||||
50222,,2
|
||||
50225,,2
|
||||
50227,,2
|
||||
51932,,2
|
||||
51936,,2
|
||||
51949,,2
|
||||
52936,,2
|
||||
54936,,2
|
||||
57002,,2
|
||||
57003,,2
|
||||
57004,,2
|
||||
57005,,2
|
||||
57006,,2
|
||||
57007,,2
|
||||
57008,,2
|
||||
57009,,2
|
||||
57010,,2
|
||||
57011,,2
|
||||
+AGAAYABg
|
||||
|
||||
The following codepages are dependencies for Visual FoxPro:
|
||||
|
||||
- 620 Mazovia (Polish) MS-DOS
|
||||
- 895 Kamenick+AP0 (Czech) MS-DOS
|
||||
|
||||
+AGAAYABgAD4-pages.csv
|
||||
620,,1
|
||||
895,,1
|
||||
+AGAAYABg
|
||||
|
||||
+ACMAIw Building Notes
|
||||
|
||||
The script +AGA-make.sh+AGA (described later) will get these files and massage the data
|
||||
@ -289,13 +124,7 @@ which implies that code +AGA-0xF6+AGA is +AGA-String.fromCharCode(0x02C6)+AGA an
|
||||
|
||||
To build the sources on windows, consult +AGA-dotnet/MakeEncoding.cs+AGA.
|
||||
|
||||
After saving the standard output to +AGA-out+AGA, a simple script processes the result:
|
||||
|
||||
+AGAAYABgAD4-dotnet.sh
|
||||
+ACMAIQ-/bin/bash
|
||||
if +AFs +ACE -e dotnet/out +AF0AOw then exit+ADs fi
|
||||
+ADw-dotnet/out tr -s ' ' '+AFw-t' +AHw awk 'NF+AD4-2 +AHs-if(outfile) close(outfile)+ADs outfile+AD0AIg-codepages/+ACI +ACQ-1 +ACI.TBL+ACIAfQ NF+AD0APQ-2 +AHs-print +AD4 outfile+AH0'
|
||||
+AGAAYABg
|
||||
After saving standard output to +AGA-out+AGA, the +AGA-dotnet.sh+AGA script processes results.
|
||||
|
||||
+ACM Building the script
|
||||
|
||||
@ -304,43 +133,9 @@ generates JS code for encoding and decoding:
|
||||
|
||||
+ACMAIw Raw Codepages
|
||||
|
||||
+AGAAYABgAD4-make.njs
|
||||
+ACMAIQ-/usr/bin/env node
|
||||
var argv +AD0 process.argv.slice(1), fs +AD0 require('fs')+ADs
|
||||
if(argv.length +ADw 2) +AHs
|
||||
console.error(+ACI-usage: make.njs +ADw-codepage+AF8-index+AD4 +AFs-variable+AF0AIg)+ADs
|
||||
process.exit(22)+ADs /+ACo EINVAL +ACo-/
|
||||
+AH0
|
||||
|
||||
var cp/+ACo:string+ACo-/ +AD0 argv+AFs-1+AF0AOw
|
||||
var jsvar/+ACo:string+ACo-/ +AD0 argv+AFs-2+AF0 +AHwAfA +ACI-cptable+ACIAOw
|
||||
var x/+ACo:string+ACo-/ +AD0 fs.readFileSync(+ACI-codepages/+ACI +- cp +- +ACI.TBL+ACI,+ACI-utf8+ACI)+ADs
|
||||
var maxcp +AD0 0, i +AD0 0, ii +AD0 0+ADs
|
||||
|
||||
var y/+ACo:Array+ADw-Array+ADw-number+AD4 +AD4AKg-/ +AD0 x.split(+ACIAXA-n+ACI).map(function(z/+ACo:string+ACo-/)/+ACo:Array+ADw-number+AD4AKg-/ +AHs
|
||||
var w/+ACo:Array+ADw-string+AD4AKg-/ +AD0 z.split(+ACIAXA-t+ACI)+ADs
|
||||
if(w.length +ADw 2) return +AFs-Number(w+AFs-0+AF0)+AF0AOw
|
||||
return +AFs-Number(w+AFs-0+AF0), Number(w+AFs-1+AF0)+AF0AOw
|
||||
+AH0).filter(function(z) +AHs return z.length +AD4 1+ADs +AH0)+ADs
|
||||
+AGAAYABg
|
||||
|
||||
The DBCS and SBCS code generation strategies are different. The maximum code is
|
||||
used to distinguish (max +AGA-0xFF+AGA for SBCS).
|
||||
|
||||
+AGAAYABg
|
||||
for(i +AD0 0+ADs i +ACEAPQ y.length+ADs +-+-i) if(y+AFs-i+AF0AWw-0+AF0 +AD4 maxcp) maxcp +AD0 y+AFs-i+AF0AWw-0+AF0AOw
|
||||
|
||||
var enc/+ACo:+AHsAWw-key:string+AF0:number+AH0AKg-/ +AD0 +AHsAfQ, dec/+ACo:+AHsAWw-key:string+AF0:string+AH0AfA-Array+ADw-string+AD4AKg-/ +AD0 (maxcp +ADw 256 ? +AFsAXQ : +AHsAfQ)+ADs
|
||||
for(i +AD0 0+ADs i +ACEAPQ y.length+ADs +-+-i) +AHs
|
||||
/+ACo:: if(Array.isArray(dec)) +ACo-/ dec+AFs-y+AFs-i+AF0AWw-0+AF0AXQ +AD0 String.fromCharCode(y+AFs-i+AF0AWw-1+AF0)+ADs
|
||||
enc+AFs-String.fromCharCode(y+AFs-i+AF0AWw-1+AF0)+AF0 +AD0 y+AFs-i+AF0AWw-0+AF0AOw
|
||||
+AH0
|
||||
|
||||
var odec +AD0 +ACIAIg, outstr +AD0 +ACIAIgA7
|
||||
if(maxcp +ADw 256) +AHs
|
||||
/+ACo:: if(Array.isArray(dec)) +AHs +ACo-/
|
||||
+AGAAYABg
|
||||
|
||||
The Unicode character +AGA-0xFFFD+AGA (REPLACEMENT CHARACTER) is used as a placeholder
|
||||
for characters that are not specified in the map (for example, +AGA-0xF0+AGA is not in
|
||||
code page 10000).
|
||||
@ -348,14 +143,6 @@ code page 10000).
|
||||
For SBCS, the idea is to embed a raw string with the contents of the 256 codes.
|
||||
The +AGA-dec+AGA field is merely a split of the string, and +AGA-enc+AGA is an eversion:
|
||||
|
||||
+AGAAYABg
|
||||
for(i +AD0 0+ADs i +ACEAPQ 256+ADs +-+-i) if(typeof dec+AFs-i+AF0 +AD0APQA9 +ACI-undefined+ACI) dec+AFs-i+AF0 +AD0 String.fromCharCode(0xFFFD)+ADs
|
||||
odec +AD0 JSON.stringify(dec.join(+ACIAIg))+ADs
|
||||
outstr +AD0 '(function()+AHs var d +AD0 ' +- odec +- ', D +AD0 +AFsAXQ, e +AD0 +AHsAfQA7 for(var i+AD0-0+ADs-i+ACEAPQ-d.length+ADsAKwAr-i) +AHs if(d.charCodeAt(i) +ACEAPQA9 0xFFFD) e+AFs-d.charAt(i)+AF0 +AD0 i+ADs D+AFs-i+AF0 +AD0 d.charAt(i)+ADs +AH0 return +AHsAIg-enc+ACI: e, +ACI-dec+ACI: D +AH0AOw +AH0)()+ADs'+ADs
|
||||
/+ACo:: +AH0 +ACo-/
|
||||
+AH0 else +AHs
|
||||
+AGAAYABg
|
||||
|
||||
DBCS is similar, except that the space is sliced in chunks of 256 bytes (strings
|
||||
are only generated for those high-bytes represented in the codepage).
|
||||
|
||||
@ -363,27 +150,6 @@ The strategy is to construct an array-of-arrays so that +AGA-dd+AFs-high+AF0AWw-
|
||||
character associated with the code. This array is combined at runtime to yield
|
||||
the complete decoding object (and the encoding object is an eversion):
|
||||
|
||||
+AGAAYABg
|
||||
var dd +AD0 +AFsAXQA7
|
||||
/+ACo:: if(+ACE-Array.isArray(dec)) +AHs +ACo-/
|
||||
for(i in dec) if(dec.hasOwnProperty(i)) +AHs
|
||||
ii +AD0 +-i+ADs
|
||||
if(typeof dd+AFs-ii +AD4APg 8+AF0 +AD0APQA9 +ACI-undefined+ACI) dd+AFs-ii +AD4APg 8+AF0 +AD0 +AFsAXQA7
|
||||
dd+AFs-ii +AD4APg 8+AF0AWw-ii +ACU 256+AF0 +AD0 dec+AFs-i+AF0AOw
|
||||
+AH0
|
||||
/+ACo:: +AH0 +ACo-/
|
||||
outstr +AD0 '(function()+AHs var d +AD0 +AFsAXQ, e +AD0 +AHsAfQ, D +AD0 +AFsAXQ, j+ADsAXA-n'+ADs
|
||||
for(var i +AD0 0+ADs i +ACEAPQ 256+ADs +-+-i) if(dd+AFs-i+AF0) +AHs
|
||||
for(var j +AD0 0+ADs j +ACEAPQ 256+ADs +-+-j) if(typeof dd+AFs-i+AF0AWw-j+AF0 +AD0APQA9 +ACI-undefined+ACI) dd+AFs-i+AF0AWw-j+AF0 +AD0 String.fromCharCode(0xFFFD)+ADs
|
||||
outstr +-+AD0 'D+AFs' +- i +- '+AF0 +AD0 ' +- JSON.stringify(dd+AFs-i+AF0.join(+ACIAIg)) +- '.split(+ACIAIg)+ADsAXA-n'+ADs
|
||||
outstr +-+AD0 'for(j +AD0 0+ADs j +ACEAPQ D+AFs' +- i +- '+AF0.length+ADs +-+-j) if(D+AFs' +- i +- '+AF0AWw-j+AF0.charCodeAt(0) +ACEAPQA9 0xFFFD) +AHs e+AFs-D+AFs' +- i +- '+AF0AWw-j+AF0AXQ +AD0 ' +- (i+ACo-256) +- ' +- j+ADs d+AFs' +- (i+ACo-256) +- ' +- j+AF0 +AD0 D+AFs' +- i +- '+AF0AWw-j+AF0AOwB9AFw-n'
|
||||
+AH0
|
||||
outstr +-+AD0 'return +AHsAIg-enc+ACI: e, +ACI-dec+ACI: d +AH0AOw +AH0)()+ADs'+ADs
|
||||
+AH0
|
||||
process.stdout.write(jsvar +- +ACIAWwAi +- cp +- +ACIAXQ +AD0 +ACI +- outstr +- +ACIAXA-n+ACI)+ADs
|
||||
|
||||
+AGAAYABg
|
||||
|
||||
+AGA-make.sh+AGA generates the tables used by +AGA-make.njs+AGA. The raw Unicode TXT files
|
||||
are columnar: +AGA-code unicode +ACM-comments+AGA. For example, the last 10 lines of the
|
||||
text file +AGA-ROMAN.TXT+AGA (for CP 10000) are:
|
||||
@ -404,35 +170,6 @@ text file +AGA-ROMAN.TXT+AGA (for CP 10000) are:
|
||||
In processing the data, the comments (after the +AGAAIwBg) are stripped and undefined
|
||||
elements (like +AGA-0x7F+AGA for CP 10000) are removed.
|
||||
|
||||
+AGAAYABgAD4-make.sh
|
||||
+ACMAIQ-/bin/bash
|
||||
INFILE+AD0AJAB7-1:-pages.csv+AH0
|
||||
OUTFILE+AD0AJAB7-2:-cptable.js+AH0
|
||||
JSVAR+AD0AJAB7-3:-cptable+AH0
|
||||
VERSION+AD0AJA(cat package.json +AHw grep version +AHw tr -dc +AFs-0-9.+AF0)
|
||||
|
||||
mkdir -p codepages bits
|
||||
rm -f +ACQ-OUTFILE +ACQ-OUTFILE.tmp
|
||||
echo +ACI-/+ACo +ACQ-OUTFILE (C) 2013-present SheetJS -- http://sheetjs.com +ACo-/+ACI +AD4 +ACQ-OUTFILE.tmp
|
||||
echo +ACI-/+ACo-jshint -W100 +ACo-/+ACI +AD4APg +ACQ-OUTFILE.tmp
|
||||
echo +ACI-var +ACQ-JSVAR +AD0 +AHs-version:+AFwAIgAk-VERSION+AFwAIgB9ADsAIg +AD4APg +ACQ-OUTFILE.tmp
|
||||
if +AFs -e dotnet.sh +AF0AOw then bash dotnet.sh+ADs fi
|
||||
awk -F, '+AHs-print +ACQ-1, +ACQ-2, +ACQ-3+AH0' +ACQ-INFILE +AHw while read cp url cptype+ADs do
|
||||
echo +ACQ-cp +ACQ-url
|
||||
if +AFs +ACE -e codepages/+ACQ-cp.TBL +AF0AOw then
|
||||
curl +ACQ-url +AHw sed 's/+ACM.+ACo-//g' +AHw awk 'NF+AD0APQ-2' +AD4 codepages/+ACQ-cp.TBL
|
||||
fi
|
||||
echo +ACI-if(typeof +ACQ-JSVAR +AD0APQA9 'undefined') +ACQ-JSVAR +AD0 +AHsAfQA7ACI +AD4 bits/+ACQ-cp.js.tmp
|
||||
node make.njs +ACQ-cp +ACQ-JSVAR +AHw tee -a bits/+ACQ-cp.js.tmp +AD4APg +ACQ-OUTFILE.tmp
|
||||
sed 's/+ACIAXA(+AFs-0-9+AF0AKwBc)+ACI:/+AFw-1:/g' +ADw-bits/+ACQ-cp.js.tmp +AD4-bits/+ACQ-cp.js
|
||||
rm -f bits/+ACQ-cp.js.tmp
|
||||
done
|
||||
echo +ACI-// eslint-disable-next-line no-undef+ACI +AD4APg +ACQ-OUTFILE.tmp
|
||||
echo +ACI-if (typeof module +ACEAPQA9 'undefined' +ACYAJg module.exports +ACYAJg typeof DO+AF8-NOT+AF8-EXPORT+AF8-CODEPAGE +AD0APQA9 'undefined') module.exports +AD0 +ACQ-JSVAR+ADsAIg +AD4APg +ACQ-OUTFILE.tmp
|
||||
sed 's/+ACIAXA(+AFs-0-9+AF0AKwBc)+ACI:/+AFw-1:/g' +ADwAJA-OUTFILE.tmp +AD4AJA-OUTFILE
|
||||
rm -f +ACQ-OUTFILE.tmp
|
||||
+AGAAYABg
|
||||
|
||||
+ACMAIw Utilities
|
||||
|
||||
The encode and decode functions are kept in a separate script (+AGA-cputils.js+AGA).
|
||||
@ -446,412 +183,8 @@ Both encode and decode deal with data represented as:
|
||||
The +AGA-ofmt+AGA variable controls +AGA-encode+AGA output (+AGA-str+AGA, +AGA-arr+AGA respectively)
|
||||
while the input format is automatically determined.
|
||||
|
||||
+ACM Tests
|
||||
|
||||
+AGAAYABgAD4-test.js
|
||||
var fs +AD0 require('fs'), assert +AD0 require('assert'), vm +AD0 require('vm')+ADs
|
||||
var cptable, sbcs+ADs
|
||||
|
||||
+AGAAYABg
|
||||
|
||||
Due to a bug in +AGA-Buffer.from+AGA in node +AGA-4.0 - 4.4+AGA, a special check is needed:
|
||||
|
||||
+AGAAYABgAD4-test.js
|
||||
var Buffer+AF8-from +AD0 function()+AHsAfQA7
|
||||
|
||||
if(typeof Buffer +ACEAPQA9 'undefined') +AHs
|
||||
var nbfs +AD0 +ACE-Buffer.from+ADs
|
||||
if(+ACE-nbfs) try +AHs Buffer.from(+ACI-foo+ACI, +ACI-utf8+ACI)+ADs +AH0 catch(e) +AHs nbfs +AD0 true+ADs +AH0
|
||||
Buffer+AF8-from +AD0 nbfs ? function(buf, enc) +AHs return (enc) ? new Buffer(buf, enc) : new Buffer(buf)+ADs +AH0 : Buffer.from.bind(Buffer)+ADs
|
||||
+AH0
|
||||
|
||||
+AGAAYABg
|
||||
|
||||
The tests include JS validity tests (requiring or evaluating code):
|
||||
|
||||
+AGAAYABgAD4-test.js
|
||||
describe('source', function() +AHs
|
||||
it('should load node', function() +AHs cptable +AD0 require('./')+ADs +AH0)+ADs
|
||||
it('should load sbcs', function() +AHs sbcs +AD0 require('./sbcs')+ADs +AH0)+ADs
|
||||
it('should load excel', function() +AHs excel +AD0 require('./cpexcel')+ADs +AH0)+ADs
|
||||
it('should process bits', function() +AHs
|
||||
var files +AD0 fs.readdirSync('bits').filter(function(x)+AHs-return x.substr(-3)+AD0APQAi.js+ACIAOwB9)+ADs
|
||||
files.forEach(function(x) +AHs
|
||||
vm.runInThisContext(fs.readFileSync('./bits/' +- x))+ADs
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AGAAYABg
|
||||
|
||||
The README tests verify the snippets in the README:
|
||||
|
||||
+AGAAYABgAD4-test.js
|
||||
describe('README', function() +AHs
|
||||
var readme +AD0 function() +AHs
|
||||
var unicode+AF8-cp10000+AF8-255 +AD0 cptable+AFs-10000+AF0.dec+AFs-255+AF0AOw // +Asc
|
||||
assert.equal(unicode+AF8-cp10000+AF8-255, +ACICxwAi)+ADs
|
||||
|
||||
var cp10000+AF8-711 +AD0 cptable+AFs-10000+AF0.enc+AFs-String.fromCharCode(711)+AF0AOw // 255
|
||||
assert.equal(cp10000+AF8-711, 255)+ADs
|
||||
|
||||
var b1 +AD0 +AFs-0xbb,0xe3,0xd7,0xdc+AF0AOw
|
||||
var s1 +AD0 b1.map(function(x) +AHs return String.fromCharCode(x)+ADs +AH0).join(+ACIAIg)+ADs
|
||||
var +bEdgOw +AD0 cptable.utils.decode(936, b1)+ADs
|
||||
var buf +AD0 cptable.utils.encode(936, +bEdgOw)+ADs
|
||||
assert.equal(+bEdgOw,+ACJsR2A7ACI)+ADs
|
||||
assert.equal(buf.length, 4)+ADs
|
||||
for(var i +AD0 0+ADs i +ACEAPQ 4+ADs +-+-i) assert.equal(b1+AFs-i+AF0, buf+AFs-i+AF0)+ADs
|
||||
|
||||
var b2 +AD0 +AFs-0xf0,0x9f,0x8d,0xa3+AF0AOw
|
||||
var sushi+AD0 cptable.utils.decode(65001, b2)+ADs
|
||||
var sbuf +AD0 cptable.utils.encode(65001, sushi)+ADs
|
||||
assert.equal(sushi,+ACLYPN9jACI)+ADs
|
||||
assert.equal(sbuf.length, 4)+ADs
|
||||
for(var i +AD0 0+ADs i +ACEAPQ 4+ADs +-+-i) assert.equal(b2+AFs-i+AF0, sbuf+AFs-i+AF0)+ADs
|
||||
|
||||
+AH0AOw
|
||||
it('should be correct', function() +AHs
|
||||
cptable.utils.cache.encache()+ADs
|
||||
readme()+ADs
|
||||
cptable.utils.cache.decache()+ADs
|
||||
readme()+ADs
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AGAAYABg
|
||||
|
||||
The consistency tests make sure that encoding and decoding are pseudo inverses:
|
||||
|
||||
+AGAAYABgAD4-test.js
|
||||
describe('consistency', function() +AHs
|
||||
cptable +AD0 require('./')+ADs
|
||||
U +AD0 cptable.utils+ADs
|
||||
var chk +AD0 function(cptable, cacheit) +AHs return function(x) +AHs
|
||||
it('should consistently process CP ' +- x, function() +AHs
|
||||
var cp +AD0 cptable+AFs-x+AF0, D +AD0 cp.dec, E +AD0 cp.enc+ADs
|
||||
if(cacheit) cptable.utils.cache.encache()+ADs
|
||||
else cptable.utils.cache.decache()+ADs
|
||||
Object.keys(D).forEach(function(d) +AHs
|
||||
if(E+AFs-D+AFs-d+AF0AXQ +ACEAPQ d) +AHs
|
||||
if(typeof E+AFs-D+AFs-d+AF0AXQ +ACEAPQA9 +ACI-undefined+ACI) return+ADs
|
||||
if(D+AFs-d+AF0.charCodeAt(0) +AD0APQ 0xFFFD) return+ADs
|
||||
if(D+AFs-E+AFs-D+AFs-d+AF0AXQBd +AD0APQA9 D+AFs-d+AF0) return+ADs
|
||||
throw new Error(x +- +ACI e.d+AFsAIg +- d +- +ACIAXQ +AD0 +ACI +- E+AFs-D+AFs-d+AF0AXQ +- +ACIAOw d+AFsAIg +- d +- +ACIAXQA9ACI +- D+AFs-d+AF0 +- +ACIAOw d.e.d+AFsAIg +- d +- +ACIAXQ +AD0 +ACI +- D+AFs-E+AFs-D+AFs-d+AF0AXQBd)+ADs
|
||||
+AH0
|
||||
+AH0)+ADs
|
||||
Object.keys(E).forEach(function(e) +AHs
|
||||
if(D+AFs-E+AFs-e+AF0AXQ +ACEAPQ e) +AHs
|
||||
throw new Error(x +- +ACI d.e+AFsAIg +- e +- +ACIAXQ +AD0 +ACI +- D+AFs-E+AFs-e+AF0AXQ +- +ACIAOw e+AFsAIg +- e +- +ACIAXQA9ACI +- E+AFs-e+AF0 +- +ACIAOw e.d.e+AFsAIg +- e +- +ACIAXQ +AD0 +ACI +- E+AFs-D+AFs-E+AFs-e+AF0AXQBd)+ADs
|
||||
+AH0
|
||||
+AH0)+ADs
|
||||
var corpus +AD0 +AFsAIg-foobar+ACIAXQA7
|
||||
corpus.forEach(function(w)+AHs
|
||||
assert.equal(U.decode(x,U.encode(x,w)),w)+ADs
|
||||
+AH0)+ADs
|
||||
cptable.utils.cache.encache()+ADs
|
||||
+AH0)+ADs
|
||||
+AH0AOw +AH0AOw
|
||||
describe('cached', function() +AHs
|
||||
Object.keys(cptable).filter(function(w) +AHs return w +AD0APQ +-w+ADs +AH0).forEach(chk(cptable, true))+ADs
|
||||
+AH0)+ADs
|
||||
describe('direct', function() +AHs
|
||||
Object.keys(cptable).filter(function(w) +AHs return w +AD0APQ +-w+ADs +AH0).forEach(chk(cptable, false))+ADs
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AGAAYABg
|
||||
|
||||
The next tests look at possible entry conditions:
|
||||
|
||||
+AGAAYABg
|
||||
describe('entry conditions', function() +AHs
|
||||
it('should fail to load utils if cptable unavailable', function() +AHs
|
||||
var sandbox +AD0 +AHsAfQA7
|
||||
var ctx +AD0 vm.createContext(sandbox)+ADs
|
||||
assert.throws(function() +AHs
|
||||
vm.runInContext(fs.readFileSync('cputils.js','utf8'),ctx)+ADs
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
it('should load utils if cptable is available', function() +AHs
|
||||
var sandbox +AD0 +AHsAfQA7
|
||||
var ctx +AD0 vm.createContext(sandbox)+ADs
|
||||
vm.runInContext(fs.readFileSync('cpexcel.js','utf8'),ctx)+ADs
|
||||
vm.runInContext(fs.readFileSync('cputils.js','utf8'),ctx)+ADs
|
||||
+AH0)+ADs
|
||||
var chken +AD0 function(cp, i) +AHs
|
||||
var c +AD0 function(cp, i, e) +AHs
|
||||
var str +AD0 cptable.utils.encode(cp,i,e)+ADs
|
||||
var arr +AD0 cptable.utils.encode(cp,i.split(+ACIAIg),e)+ADs
|
||||
assert.deepEqual(str,arr)+ADs
|
||||
if(typeof Buffer +AD0APQA9 'undefined') return+ADs
|
||||
var buf +AD0 cptable.utils.encode(cp,Buffer+AF8-from(i),e)+ADs
|
||||
assert.deepEqual(str,buf)+ADs
|
||||
+AH0AOw
|
||||
cptable.utils.cache.encache()+ADs
|
||||
c(cp,i)+ADs
|
||||
c(cp,i,'buf')+ADs
|
||||
c(cp,i,'arr')+ADs
|
||||
c(cp,i,'str')+ADs
|
||||
cptable.utils.cache.decache()+ADs
|
||||
c(cp,i)+ADs
|
||||
c(cp,i,'buf')+ADs
|
||||
c(cp,i,'arr')+ADs
|
||||
c(cp,i,'str')+ADs
|
||||
+AH0AOw
|
||||
describe('encode', function() +AHs
|
||||
it('CP 1252 : sbcs', function() +AHs chken(1252,+ACI-foo+ICI-b+AP4-r+ACI)+ADs +AH0)+ADs
|
||||
it('CP 708 : sbcs', function() +AHs chken(708,+ACIGKg and +Bis smiley faces+ACI)+ADsAfQ)+ADs
|
||||
it('CP 936 : dbcs', function() +AHs chken(936, +ACKP2WYvTi1lh1tXeyZtS4vVACI)+ADsAfQ)+ADs
|
||||
+AH0)+ADs
|
||||
var chkde +AD0 function(cp, i) +AHs
|
||||
var c +AD0 function(cp, i) +AHs
|
||||
var s+ADs
|
||||
if(typeof Buffer +ACEAPQA9 'undefined' +ACYAJg i instanceof Buffer) s +AD0 +AFsAXQ.map.call(i, function(s)+AHs-return String.fromCharCode(s)+ADs +AH0)+ADs
|
||||
else s+AD0(i.map) ? i.map(function(s)+AHs-return String.fromCharCode(s)+ADs +AH0) : i+ADs
|
||||
var str +AD0 cptable.utils.decode(cp,i)+ADs
|
||||
var arr +AD0 cptable.utils.decode(cp,s.join?s.join(+ACIAIg):s)+ADs
|
||||
assert.deepEqual(str,arr)+ADs
|
||||
if(typeof Buffer +AD0APQA9 'undefined') return+ADs
|
||||
var buf +AD0 cptable.utils.decode(cp,Buffer+AF8-from(i))+ADs
|
||||
assert.deepEqual(str,buf)+ADs
|
||||
+AH0AOw
|
||||
cptable.utils.cache.encache()+ADs
|
||||
c(cp,i)+ADs
|
||||
cptable.utils.cache.decache()+ADs
|
||||
c(cp,i)+ADs
|
||||
+AH0AOw
|
||||
describe('decode', function() +AHs
|
||||
it('CP 1252 : sbcs', function() +AHs chkde(1252,+AFs-0x66, 0x6f, 0x6f, 0x62, 0x61, 0x72+AF0)+ADs +AH0)+ADs /+ACo +ACI-foobar+ACI +ACo-/
|
||||
if(typeof Buffer +ACEAPQA9 'undefined') it('CP 708 : sbcs', function() +AHs chkde(708, Buffer+AF8-from(+AFs-0xca, 0x20, 0x61, 0x6e, 0x64, 0x20, 0xcb, 0x20, 0x73, 0x6d, 0x69, 0x6c, 0x65, 0x79, 0x20, 0x66, 0x61, 0x63, 0x65, 0x73+AF0))+ADs +AH0)+ADs /+ACo (+ACIGKg and +Bis smiley faces+ACI) +ACo-/
|
||||
it('CP 936 : dbcs', function() +AHs chkde(936, +AFs-0xd5, 0xe2, 0xca, 0xc7, 0xd6, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xb7, 0xfb, 0xb2, 0xe2, 0xca, 0xd4+AF0)+ADsAfQ)+ADs /+ACo +ACKP2WYvTi1lh1tXeyZtS4vVACI +ACo-/
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AGAAYABg
|
||||
|
||||
The +AGA-testfile+AGA helper function reads a file and compares to node's read facilities:
|
||||
|
||||
+AGAAYABgAD4-test.js
|
||||
function testfile(f,cp,type,skip) +AHs
|
||||
var d +AD0 fs.readFileSync(f)+ADs
|
||||
var x +AD0 fs.readFileSync(f, type)+ADs
|
||||
var a +AD0 x.split(+ACIAIg)+ADs
|
||||
var chk +AD0 function(cp) +AHs
|
||||
var y +AD0 cptable.utils.decode(cp, d)+ADs
|
||||
assert.equal(x,y)+ADs
|
||||
var z +AD0 cptable.utils.encode(cp, x)+ADs
|
||||
if(z.length +ACEAPQ d.length) throw new Error(f +- +ACI +ACI +- JSON.stringify(z) +- +ACI +ACEAPQ +ACI +- JSON.stringify(d) +- +ACI : +ACI +- z.length +- +ACI +ACI +- d.length)+ADs
|
||||
for(var i +AD0 0+ADs i +ACEAPQ d.length+ADs +-+-i) if(d+AFs-i+AF0 +ACEAPQA9 z+AFs-i+AF0) throw new Error(+ACIAIg +- i +- +ACI +ACI +- d+AFs-i+AF0 +- +ACIAIQA9ACI +- z+AFs-i+AF0)+ADs
|
||||
if(skip) return+ADs
|
||||
z +AD0 cptable.utils.encode(cp, a)+ADs
|
||||
if(z.length +ACEAPQ d.length) throw new Error(f +- +ACI +ACI +- JSON.stringify(z) +- +ACI +ACEAPQ +ACI +- JSON.stringify(d) +- +ACI : +ACI +- z.length +- +ACI +ACI +- d.length)+ADs
|
||||
for(var i +AD0 0+ADs i +ACEAPQ d.length+ADs +-+-i) if(d+AFs-i+AF0 +ACEAPQA9 z+AFs-i+AF0) throw new Error(+ACIAIg +- i +- +ACI +ACI +- d+AFs-i+AF0 +- +ACIAIQA9ACI +- z+AFs-i+AF0)+ADs
|
||||
if(f.indexOf(+ACI-cptable.js+ACI) +AD0APQ -1) +AHs
|
||||
cptable.utils.encode(cp, d, 'str')+ADs
|
||||
cptable.utils.encode(cp, d, 'arr')+ADs
|
||||
+AH0
|
||||
+AH0
|
||||
cptable.utils.cache.encache()+ADs
|
||||
chk(cp)+ADs
|
||||
if(skip) return+ADs
|
||||
cptable.utils.cache.decache()+ADs
|
||||
chk(cp)+ADs
|
||||
cptable.utils.cache.encache()+ADs
|
||||
+AH0
|
||||
+AGAAYABg
|
||||
|
||||
The +AGA-utf8+AGA tests verify UTF-8 encoding of the actual JS sources:
|
||||
|
||||
+AGAAYABgAD4-test.js
|
||||
describe('node natives', function() +AHs
|
||||
var node +AD0 +AFsAWw-65001, 'utf8',1+AF0, +AFs-1200, 'utf16le',1+AF0, +AFs-20127, 'ascii',0+AF0AXQA7
|
||||
var unicodefiles +AD0 +AFs'codepage.md','README.md','cptable.js'+AF0AOw
|
||||
var asciifiles +AD0 +AFs'cputils.js'+AF0AOw
|
||||
node.forEach(function(w) +AHs
|
||||
describe(w+AFs-1+AF0, function() +AHs
|
||||
cptable +AD0 require('./')+ADs
|
||||
asciifiles.forEach(function(f) +AHs
|
||||
it('should process ' +- f, function() +AHs testfile('./misc/'+-f+-'.'+-w+AFs-1+AF0,w+AFs-0+AF0,w+AFs-1+AF0)+ADs +AH0)+ADs
|
||||
+AH0)+ADs
|
||||
if(+ACE-w+AFs-2+AF0) return+ADs
|
||||
unicodefiles.forEach(function(f) +AHs
|
||||
it('should process ' +- f, function() +AHs testfile('./misc/'+-f+-'.'+-w+AFs-1+AF0,w+AFs-0+AF0,w+AFs-1+AF0)+ADs +AH0)+ADs
|
||||
+AH0)+ADs
|
||||
if(w+AFs-1+AF0 +AD0APQA9 'utf8') it('should process bits', function() +AHs
|
||||
var files +AD0 fs.readdirSync('bits').filter(function(x)+AHs-return x.substr(-3)+AD0APQAi.js+ACIAOwB9)+ADs
|
||||
files.forEach(function(f) +AHs testfile('./bits/' +- f,w+AFs-0+AF0,w+AFs-1+AF0,true)+ADs +AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AGAAYABg
|
||||
|
||||
The +AGA-utf+ACoAYA and +AGA-ascii+AGA tests attempt to test other magic formats:
|
||||
|
||||
+AGAAYABgAD4-test.js
|
||||
var m +AD0 cptable.utils.magic+ADs
|
||||
function cmp(x,z) +AHs
|
||||
assert.equal(x.length, z.length)+ADs
|
||||
for(var i +AD0 0+ADs i +ACEAPQ z.length+ADs +-+-i) assert.equal(i+-+ACI-/+ACIAKw-x.length+-+ACIAIgAr-x+AFs-i+AF0, i+-+ACI-/+ACIAKw-z.length+-+ACIAIgAr-z+AFs-i+AF0)+ADs
|
||||
+AH0
|
||||
Object.keys(m).forEach(function(t)+AHs-if(t +ACEAPQ 16969) describe(m+AFs-t+AF0, function() +AHs
|
||||
it(+ACI-should process codepage.md.+ACI +- m+AFs-t+AF0, fs.existsSync('./misc/codepage.md.' +- m+AFs-t+AF0) ?
|
||||
function() +AHs
|
||||
var b +AD0 fs.readFileSync('./misc/codepage.md.utf8', +ACI-utf8+ACI)+ADs
|
||||
if(m+AFs-t+AF0 +AD0APQA9 +ACI-ascii+ACI) b +AD0 b.replace(/+AFsAXA-u0080-+AFw-uffff+AF0AKg-/g,+ACIAIg)+ADs
|
||||
var x +AD0 fs.readFileSync('./misc/codepage.md.' +- m+AFs-t+AF0)+ADs
|
||||
var y, z+ADs
|
||||
cptable.utils.cache.encache()+ADs
|
||||
y +AD0 cptable.utils.decode(t, x)+ADs
|
||||
assert.equal(y,b)+ADs
|
||||
z +AD0 cptable.utils.encode(t, y)+ADs
|
||||
if(t +ACEAPQ 65000) cmp(x,z)+ADs
|
||||
else +AHs assert.equal(y, cptable.utils.decode(t, z))+ADs +AH0
|
||||
cptable.utils.cache.decache()+ADs
|
||||
y +AD0 cptable.utils.decode(t, x)+ADs
|
||||
assert.equal(y,b)+ADs
|
||||
z +AD0 cptable.utils.encode(t, y)+ADs
|
||||
if(t +ACEAPQ 65000) cmp(x,z)+ADs
|
||||
else +AHs assert.equal(y, cptable.utils.decode(t, z))+ADs +AH0
|
||||
cptable.utils.cache.encache()+ADs
|
||||
cptable.utils.encode(t, y, 'str')+ADs
|
||||
cptable.utils.encode(t, y, 'arr')+ADs
|
||||
cptable.utils.cache.decache()+ADs
|
||||
cptable.utils.encode(t, y, 'str')+ADs
|
||||
cptable.utils.encode(t, y, 'arr')+ADs
|
||||
cptable.utils.cache.encache()+ADs
|
||||
+AH0
|
||||
: null)+ADs
|
||||
it(+ACI-should process README.md.+ACI +- m+AFs-t+AF0, fs.existsSync('./misc/README.md.' +- m+AFs-t+AF0) ?
|
||||
function() +AHs
|
||||
var b +AD0 fs.readFileSync('./misc/README.md.utf8', +ACI-utf8+ACI)+ADs
|
||||
if(m+AFs-t+AF0 +AD0APQA9 +ACI-ascii+ACI) b +AD0 b.replace(/+AFsAXA-u0080-+AFw-uffff+AF0AKg-/g,+ACIAIg)+ADs
|
||||
var x +AD0 fs.readFileSync('./misc/README.md.' +- m+AFs-t+AF0)+ADs
|
||||
x +AD0 +AFsAXQ.slice.call(x)+ADs
|
||||
cptable.utils.cache.encache()+ADs
|
||||
var y +AD0 cptable.utils.decode(t, x)+ADs
|
||||
assert.equal(y,b)+ADs
|
||||
cptable.utils.cache.decache()+ADs
|
||||
var y +AD0 cptable.utils.decode(t, x)+ADs
|
||||
assert.equal(y,b)+ADs
|
||||
cptable.utils.cache.encache()+ADs
|
||||
+AH0
|
||||
: null)+ADs
|
||||
+AH0)+ADsAfQ)+ADs
|
||||
+AGAAYABg
|
||||
|
||||
The codepage +AGA-6969+AGA is not defined, so operations should fail:
|
||||
|
||||
+AGAAYABgAD4-test.js
|
||||
describe('failures', function() +AHs
|
||||
it('should fail to find CP 6969', function() +AHs
|
||||
assert.throws(function()+AHs-cptable+AFs-6969+AF0.dec+AH0)+ADs
|
||||
assert.throws(function()+AHs-cptable+AFs-6969+AF0.enc+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
it('should fail using utils', function() +AHs
|
||||
assert(+ACE-cptable.utils.hascp(6969))+ADs
|
||||
assert.throws(function()+AHs-return cptable.utils.encode(6969, +ACI-foobar+ACI)+ADs +AH0)+ADs
|
||||
assert.throws(function()+AHs-return cptable.utils.decode(6969, +AFs-0x20+AF0)+ADs +AH0)+ADs
|
||||
+AH0)+ADs
|
||||
it('should fail with black magic', function() +AHs
|
||||
assert(cptable.utils.hascp(16969))+ADs
|
||||
assert.throws(function()+AHs-return cptable.utils.encode(16969, +ACI-foobar+ACI)+ADs +AH0)+ADs
|
||||
assert.throws(function()+AHs-return cptable.utils.decode(16969, +AFs-0x20+AF0)+ADs +AH0)+ADs
|
||||
+AH0)+ADs
|
||||
it('should fail when presented with invalid char codes', function() +AHs
|
||||
assert.throws(function()+AHs-cptable.utils.cache.decache()+ADs return cptable.utils.encode(20127, +AFs-String.fromCharCode(0xAA)+AF0)+ADsAfQ)+ADs
|
||||
+AH0)+ADs
|
||||
it('should fail to propagate UTF8 BOM in UTF7', function() +AHs
|
||||
+AFsAIgAr-/v8-abc+ACI, +ACIAKw-/v9+ACIAXQ.forEach(function(m) +AHs assert.throws(function() +AHs
|
||||
assert.equal(m, cptable.utils.encode(65000, cptable.utils.decode(65000, m)))+ADs
|
||||
+AH0)+ADs +AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AH0)+ADs
|
||||
+AGAAYABg
|
||||
|
||||
+ACM Nitty Gritty
|
||||
|
||||
+AGAAYABg-json+AD4-package.json
|
||||
+AHs
|
||||
+ACI-name+ACI: +ACI-codepage+ACI,
|
||||
+ACI-version+ACI: +ACI-1.14.0+ACI,
|
||||
+ACI-author+ACI: +ACI-SheetJS+ACI,
|
||||
+ACI-description+ACI: +ACI-pure-JS library to handle codepages+ACI,
|
||||
+ACI-keywords+ACI: +AFs +ACI-codepage+ACI, +ACI-iconv+ACI, +ACI-convert+ACI, +ACI-strings+ACI +AF0,
|
||||
+ACI-bin+ACI: +AHs
|
||||
+ACI-codepage+ACI: +ACI./bin/codepage.njs+ACI
|
||||
+AH0,
|
||||
+ACI-main+ACI: +ACI-cputils.js+ACI,
|
||||
+ACI-types+ACI: +ACI-types+ACI,
|
||||
+ACI-browser+ACI: +AHs
|
||||
+ACI-buffer+ACI: +ACI-false+ACI
|
||||
+AH0,
|
||||
+ACI-dependencies+ACI: +AHs
|
||||
+ACI-commander+ACI: +ACIAfg-2.14.1+ACI,
|
||||
+ACI-exit-on-epipe+ACI: +ACIAfg-1.0.1+ACI
|
||||
+AH0,
|
||||
+ACI-devDependencies+ACI: +AHs
|
||||
+ACI-voc+ACI: +ACIAfg-1.1.0+ACI,
|
||||
+ACI-mocha+ACI: +ACIAfg-2.5.3+ACI,
|
||||
+ACI-blanket+ACI: +ACIAfg-1.2.3+ACI,
|
||||
+ACIAQA-sheetjs/uglify-js+ACI: +ACIAfg-2.7.3+ACI,
|
||||
+ACIAQA-types/node+ACI: +ACIAXg-8.0.7+ACI,
|
||||
+ACIAQA-types/commander+ACI: +ACIAXg-2.12.0+ACI,
|
||||
+ACI-dtslint+ACI: +ACIAXg-0.1.2+ACI,
|
||||
+ACI-typescript+ACI: +ACI-2.2.0+ACI
|
||||
+AH0,
|
||||
+ACI-repository+ACI: +AHs +ACI-type+ACI:+ACI-git+ACI, +ACI-url+ACI:+ACI-git://github.com/SheetJS/js-codepage.git+ACIAfQ,
|
||||
+ACI-scripts+ACI: +AHs
|
||||
+ACI-pretest+ACI: +ACI-git submodule init +ACYAJg git submodule update+ACI,
|
||||
+ACI-test+ACI: +ACI-make test+ACI,
|
||||
+ACI-build+ACI: +ACI-make js+ACI,
|
||||
+ACI-lint+ACI: +ACI-make fullint+ACI,
|
||||
+ACI-dtslint+ACI: +ACI-dtslint types+ACI
|
||||
+AH0,
|
||||
+ACI-config+ACI: +AHs
|
||||
+ACI-blanket+ACI: +AHs
|
||||
+ACI-pattern+ACI: +ACIAWw-cputils.js+AF0AIg
|
||||
+AH0
|
||||
+AH0,
|
||||
+ACI-alex+ACI: +AHs
|
||||
+ACI-allow+ACI: +AFs
|
||||
+ACI-chinese+ACI,
|
||||
+ACI-european+ACI,
|
||||
+ACI-german+ACI,
|
||||
+ACI-japanese+ACI,
|
||||
+ACI-latin+ACI
|
||||
+AF0
|
||||
+AH0,
|
||||
+ACI-homepage+ACI: +ACI-http://sheetjs.com/opensource+ACI,
|
||||
+ACI-files+ACI: +AFs
|
||||
+ACI-LICENSE+ACI,
|
||||
+ACI-README.md+ACI,
|
||||
+ACI-bin+ACI,
|
||||
+ACI-bits/+ACo.js+ACI,
|
||||
+ACI-types/index.d.ts+ACI,
|
||||
+ACI-types/+ACo.json+ACI,
|
||||
+ACI-cptable.js+ACI,
|
||||
+ACI-cputils.js+ACI,
|
||||
+ACI-dist/sbcs.full.js+ACI,
|
||||
+ACI-dist/cpexcel.full.js+ACI
|
||||
+AF0,
|
||||
+ACI-bugs+ACI: +AHs +ACI-url+ACI: +ACI-https://github.com/SheetJS/js-codepage/issues+ACI +AH0,
|
||||
+ACI-license+ACI: +ACI-Apache-2.0+ACI,
|
||||
+ACI-engines+ACI: +AHs +ACI-node+ACI: +ACIAPgA9-0.8+ACI +AH0
|
||||
+AH0
|
||||
+AGAAYABg
|
||||
|
||||
+AGAAYABgAD4.vocrc
|
||||
+AHs +ACI-post+ACI: +ACI-make js+ACI +AH0
|
||||
+AGAAYABg
|
||||
|
||||
+AGAAYABgAD4.gitignore
|
||||
node+AF8-modules
|
||||
package-lock.json
|
||||
+ACo.tgz
|
||||
.gitignore
|
||||
codepages/
|
||||
.vocrc
|
||||
make.sh
|
||||
make.njs
|
||||
misc/coverage.html
|
||||
codepage+AF8-mini.md
|
||||
ctest/sauce+ACo
|
||||
+AGAAYABg
|
||||
|
@ -2,78 +2,14 @@
|
||||
|
||||
The fields of the `pages.csv` manifest are `codepage,url,bytes` (SBCS=1, DBCS=2)
|
||||
|
||||
```>pages.csv
|
||||
37,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT,1
|
||||
437,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT,1
|
||||
500,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT,1
|
||||
737,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT,1
|
||||
775,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT,1
|
||||
850,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT,1
|
||||
852,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT,1
|
||||
855,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT,1
|
||||
857,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT,1
|
||||
860,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT,1
|
||||
861,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT,1
|
||||
862,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT,1
|
||||
863,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT,1
|
||||
864,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT,1
|
||||
865,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT,1
|
||||
866,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT,1
|
||||
869,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT,1
|
||||
874,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT,1
|
||||
875,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT,1
|
||||
932,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT,2
|
||||
936,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT,2
|
||||
949,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT,2
|
||||
950,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT,2
|
||||
1026,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT,1
|
||||
1250,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT,1
|
||||
1251,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT,1
|
||||
1252,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT,1
|
||||
1253,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT,1
|
||||
1254,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT,1
|
||||
1255,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT,1
|
||||
1256,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT,1
|
||||
1257,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT,1
|
||||
1258,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT,1
|
||||
47451,http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT,1
|
||||
```
|
||||
|
||||
Note that the Windows rendering is used for the Mac code pages. The primary
|
||||
difference is the use of the private `0xF8FF` code (which renders as an Apple
|
||||
logo on macs but as garbage on other operating systems). It may be desirable
|
||||
to fall back to the behavior, in which case the files are under APPLE and not
|
||||
MICSFT. Codepages are an absolute pain :/
|
||||
|
||||
```>pages.csv
|
||||
10000,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT,1
|
||||
10006,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT,1
|
||||
10007,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT,1
|
||||
10029,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT,1
|
||||
10079,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT,1
|
||||
10081,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT,1
|
||||
```
|
||||
MICSFT. This affects codepages 10000, 10006, 10007, 10029, 10079, 10081
|
||||
|
||||
The numbering scheme for the `ISO-8859-X` series is `28590 + X`:
|
||||
|
||||
```>pages.csv
|
||||
28591,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT,1
|
||||
28592,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT,1
|
||||
28593,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT,1
|
||||
28594,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT,1
|
||||
28595,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT,1
|
||||
28596,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT,1
|
||||
28597,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT,1
|
||||
28598,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT,1
|
||||
28599,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT,1
|
||||
28600,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT,1
|
||||
28601,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT,1
|
||||
28603,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT,1
|
||||
28604,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT,1
|
||||
28605,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT,1
|
||||
28606,http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-16.TXT,1
|
||||
```
|
||||
|
||||
## Generated Codepages
|
||||
|
||||
The following codepages are available in .NET on Windows:
|
||||
@ -142,11 +78,6 @@ The following codepages are available in .NET on Windows:
|
||||
- 21866 Ukrainian (KOI8-U); Cyrillic (KOI8-U)
|
||||
- 29001 Europa 3
|
||||
- 38598 ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
|
||||
- 50220 ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
|
||||
- 50221 ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS Allow 1 byte Kana)
|
||||
- 50222 ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS Allow 1 byte Kana - SO/SI)
|
||||
- 50225 ISO 2022 Korean
|
||||
- 50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
|
||||
- 51932 EUC Japanese
|
||||
- 51936 EUC Simplified Chinese; Chinese Simplified (EUC)
|
||||
- 51949 EUC Korean
|
||||
@ -163,107 +94,11 @@ The following codepages are available in .NET on Windows:
|
||||
- 57010 ISCII Gujarati
|
||||
- 57011 ISCII Punjabi
|
||||
|
||||
```>pages.csv
|
||||
708,,1
|
||||
720,,1
|
||||
808,,1
|
||||
858,,1
|
||||
870,,1
|
||||
872,,1
|
||||
1010,,1
|
||||
1047,,1
|
||||
1132,,1
|
||||
1140,,1
|
||||
1141,,1
|
||||
1142,,1
|
||||
1143,,1
|
||||
1144,,1
|
||||
1145,,1
|
||||
1146,,1
|
||||
1147,,1
|
||||
1148,,1
|
||||
1149,,1
|
||||
1361,,2
|
||||
10001,,2
|
||||
10002,,2
|
||||
10003,,2
|
||||
10004,,1
|
||||
10005,,1
|
||||
10008,,2
|
||||
10010,,1
|
||||
10017,,1
|
||||
10021,,1
|
||||
10082,,1
|
||||
20000,,2
|
||||
20001,,2
|
||||
20002,,2
|
||||
20003,,2
|
||||
20004,,2
|
||||
20005,,2
|
||||
20105,,1
|
||||
20106,,1
|
||||
20107,,1
|
||||
20108,,1
|
||||
20261,,2
|
||||
20269,,1
|
||||
20273,,1
|
||||
20277,,1
|
||||
20278,,1
|
||||
20280,,1
|
||||
20284,,1
|
||||
20285,,1
|
||||
20290,,1
|
||||
20297,,1
|
||||
20420,,1
|
||||
20423,,1
|
||||
20424,,1
|
||||
20833,,1
|
||||
20838,,1
|
||||
20866,,1
|
||||
20871,,1
|
||||
20880,,1
|
||||
20905,,1
|
||||
20924,,1
|
||||
20932,,2
|
||||
20936,,2
|
||||
20949,,2
|
||||
21025,,1
|
||||
21027,,1
|
||||
21866,,1
|
||||
29001,,1
|
||||
38598,,1
|
||||
50220,,2
|
||||
50221,,2
|
||||
50222,,2
|
||||
50225,,2
|
||||
50227,,2
|
||||
51932,,2
|
||||
51936,,2
|
||||
51949,,2
|
||||
52936,,2
|
||||
54936,,2
|
||||
57002,,2
|
||||
57003,,2
|
||||
57004,,2
|
||||
57005,,2
|
||||
57006,,2
|
||||
57007,,2
|
||||
57008,,2
|
||||
57009,,2
|
||||
57010,,2
|
||||
57011,,2
|
||||
```
|
||||
|
||||
The following codepages are dependencies for Visual FoxPro:
|
||||
|
||||
- 620 Mazovia (Polish) MS-DOS
|
||||
- 895 Kamenický (Czech) MS-DOS
|
||||
|
||||
```>pages.csv
|
||||
620,,1
|
||||
895,,1
|
||||
```
|
||||
|
||||
## Building Notes
|
||||
|
||||
The script `make.sh` (described later) will get these files and massage the data
|
||||
@ -289,13 +124,7 @@ which implies that code `0xF6` is `String.fromCharCode(0x02C6)` and vice versa.
|
||||
|
||||
To build the sources on windows, consult `dotnet/MakeEncoding.cs`.
|
||||
|
||||
After saving the standard output to `out`, a simple script processes the result:
|
||||
|
||||
```>dotnet.sh
|
||||
#!/bin/bash
|
||||
if [ ! -e dotnet/out ]; then exit; fi
|
||||
<dotnet/out tr -s ' ' '\t' | awk 'NF>2 {if(outfile) close(outfile); outfile="codepages/" $1 ".TBL"} NF==2 {print > outfile}'
|
||||
```
|
||||
After saving standard output to `out`, the `dotnet.sh` script processes results.
|
||||
|
||||
# Building the script
|
||||
|
||||
@ -304,43 +133,9 @@ generates JS code for encoding and decoding:
|
||||
|
||||
## Raw Codepages
|
||||
|
||||
```>make.njs
|
||||
#!/usr/bin/env node
|
||||
var argv = process.argv.slice(1), fs = require('fs');
|
||||
if(argv.length < 2) {
|
||||
console.error("usage: make.njs <codepage_index> [variable]");
|
||||
process.exit(22); /* EINVAL */
|
||||
}
|
||||
|
||||
var cp/*:string*/ = argv[1];
|
||||
var jsvar/*:string*/ = argv[2] || "cptable";
|
||||
var x/*:string*/ = fs.readFileSync("codepages/" + cp + ".TBL","utf8");
|
||||
var maxcp = 0, i = 0, ii = 0;
|
||||
|
||||
var y/*:Array<Array<number> >*/ = x.split("\n").map(function(z/*:string*/)/*:Array<number>*/ {
|
||||
var w/*:Array<string>*/ = z.split("\t");
|
||||
if(w.length < 2) return [Number(w[0])];
|
||||
return [Number(w[0]), Number(w[1])];
|
||||
}).filter(function(z) { return z.length > 1; });
|
||||
```
|
||||
|
||||
The DBCS and SBCS code generation strategies are different. The maximum code is
|
||||
used to distinguish (max `0xFF` for SBCS).
|
||||
|
||||
```
|
||||
for(i = 0; i != y.length; ++i) if(y[i][0] > maxcp) maxcp = y[i][0];
|
||||
|
||||
var enc/*:{[key:string]:number}*/ = {}, dec/*:{[key:string]:string}|Array<string>*/ = (maxcp < 256 ? [] : {});
|
||||
for(i = 0; i != y.length; ++i) {
|
||||
/*:: if(Array.isArray(dec)) */ dec[y[i][0]] = String.fromCharCode(y[i][1]);
|
||||
enc[String.fromCharCode(y[i][1])] = y[i][0];
|
||||
}
|
||||
|
||||
var odec = "", outstr = "";
|
||||
if(maxcp < 256) {
|
||||
/*:: if(Array.isArray(dec)) { */
|
||||
```
|
||||
|
||||
The Unicode character `0xFFFD` (REPLACEMENT CHARACTER) is used as a placeholder
|
||||
for characters that are not specified in the map (for example, `0xF0` is not in
|
||||
code page 10000).
|
||||
@ -348,14 +143,6 @@ code page 10000).
|
||||
For SBCS, the idea is to embed a raw string with the contents of the 256 codes.
|
||||
The `dec` field is merely a split of the string, and `enc` is an eversion:
|
||||
|
||||
```
|
||||
for(i = 0; i != 256; ++i) if(typeof dec[i] === "undefined") dec[i] = String.fromCharCode(0xFFFD);
|
||||
odec = JSON.stringify(dec.join(""));
|
||||
outstr = '(function(){ var d = ' + odec + ', D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();';
|
||||
/*:: } */
|
||||
} else {
|
||||
```
|
||||
|
||||
DBCS is similar, except that the space is sliced in chunks of 256 bytes (strings
|
||||
are only generated for those high-bytes represented in the codepage).
|
||||
|
||||
@ -363,27 +150,6 @@ The strategy is to construct an array-of-arrays so that `dd[high][low]` is the
|
||||
character associated with the code. This array is combined at runtime to yield
|
||||
the complete decoding object (and the encoding object is an eversion):
|
||||
|
||||
```
|
||||
var dd = [];
|
||||
/*:: if(!Array.isArray(dec)) { */
|
||||
for(i in dec) if(dec.hasOwnProperty(i)) {
|
||||
ii = +i;
|
||||
if(typeof dd[ii >> 8] === "undefined") dd[ii >> 8] = [];
|
||||
dd[ii >> 8][ii % 256] = dec[i];
|
||||
}
|
||||
/*:: } */
|
||||
outstr = '(function(){ var d = [], e = {}, D = [], j;\n';
|
||||
for(var i = 0; i != 256; ++i) if(dd[i]) {
|
||||
for(var j = 0; j != 256; ++j) if(typeof dd[i][j] === "undefined") dd[i][j] = String.fromCharCode(0xFFFD);
|
||||
outstr += 'D[' + i + '] = ' + JSON.stringify(dd[i].join("")) + '.split("");\n';
|
||||
outstr += 'for(j = 0; j != D[' + i + '].length; ++j) if(D[' + i + '][j].charCodeAt(0) !== 0xFFFD) { e[D[' + i + '][j]] = ' + (i*256) + ' + j; d[' + (i*256) + ' + j] = D[' + i + '][j];}\n'
|
||||
}
|
||||
outstr += 'return {"enc": e, "dec": d }; })();';
|
||||
}
|
||||
process.stdout.write(jsvar + "[" + cp + "] = " + outstr + "\n");
|
||||
|
||||
```
|
||||
|
||||
`make.sh` generates the tables used by `make.njs`. The raw Unicode TXT files
|
||||
are columnar: `code unicode #comments`. For example, the last 10 lines of the
|
||||
text file `ROMAN.TXT` (for CP 10000) are:
|
||||
@ -404,35 +170,6 @@ text file `ROMAN.TXT` (for CP 10000) are:
|
||||
In processing the data, the comments (after the `#`) are stripped and undefined
|
||||
elements (like `0x7F` for CP 10000) are removed.
|
||||
|
||||
```>make.sh
|
||||
#!/bin/bash
|
||||
INFILE=${1:-pages.csv}
|
||||
OUTFILE=${2:-cptable.js}
|
||||
JSVAR=${3:-cptable}
|
||||
VERSION=$(cat package.json | grep version | tr -dc [0-9.])
|
||||
|
||||
mkdir -p codepages bits
|
||||
rm -f $OUTFILE $OUTFILE.tmp
|
||||
echo "/* $OUTFILE (C) 2013-present SheetJS -- http://sheetjs.com */" > $OUTFILE.tmp
|
||||
echo "/*jshint -W100 */" >> $OUTFILE.tmp
|
||||
echo "var $JSVAR = {version:\"$VERSION\"};" >> $OUTFILE.tmp
|
||||
if [ -e dotnet.sh ]; then bash dotnet.sh; fi
|
||||
awk -F, '{print $1, $2, $3}' $INFILE | while read cp url cptype; do
|
||||
echo $cp $url
|
||||
if [ ! -e codepages/$cp.TBL ]; then
|
||||
curl $url | sed 's/#.*//g' | awk 'NF==2' > codepages/$cp.TBL
|
||||
fi
|
||||
echo "if(typeof $JSVAR === 'undefined') $JSVAR = {};" > bits/$cp.js.tmp
|
||||
node make.njs $cp $JSVAR | tee -a bits/$cp.js.tmp >> $OUTFILE.tmp
|
||||
sed 's/"\([0-9]+\)":/\1:/g' <bits/$cp.js.tmp >bits/$cp.js
|
||||
rm -f bits/$cp.js.tmp
|
||||
done
|
||||
echo "// eslint-disable-next-line no-undef" >> $OUTFILE.tmp
|
||||
echo "if (typeof module !== 'undefined' && module.exports && typeof DO_NOT_EXPORT_CODEPAGE === 'undefined') module.exports = $JSVAR;" >> $OUTFILE.tmp
|
||||
sed 's/"\([0-9]+\)":/\1:/g' <$OUTFILE.tmp >$OUTFILE
|
||||
rm -f $OUTFILE.tmp
|
||||
```
|
||||
|
||||
## Utilities
|
||||
|
||||
The encode and decode functions are kept in a separate script (`cputils.js`).
|
||||
@ -446,412 +183,8 @@ Both encode and decode deal with data represented as:
|
||||
The `ofmt` variable controls `encode` output (`str`, `arr` respectively)
|
||||
while the input format is automatically determined.
|
||||
|
||||
# Tests
|
||||
|
||||
```>test.js
|
||||
var fs = require('fs'), assert = require('assert'), vm = require('vm');
|
||||
var cptable, sbcs;
|
||||
|
||||
```
|
||||
|
||||
Due to a bug in `Buffer.from` in node `4.0 - 4.4`, a special check is needed:
|
||||
|
||||
```>test.js
|
||||
var Buffer_from = function(){};
|
||||
|
||||
if(typeof Buffer !== 'undefined') {
|
||||
var nbfs = !Buffer.from;
|
||||
if(!nbfs) try { Buffer.from("foo", "utf8"); } catch(e) { nbfs = true; }
|
||||
Buffer_from = nbfs ? function(buf, enc) { return (enc) ? new Buffer(buf, enc) : new Buffer(buf); } : Buffer.from.bind(Buffer);
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
The tests include JS validity tests (requiring or evaluating code):
|
||||
|
||||
```>test.js
|
||||
describe('source', function() {
|
||||
it('should load node', function() { cptable = require('./'); });
|
||||
it('should load sbcs', function() { sbcs = require('./sbcs'); });
|
||||
it('should load excel', function() { excel = require('./cpexcel'); });
|
||||
it('should process bits', function() {
|
||||
var files = fs.readdirSync('bits').filter(function(x){return x.substr(-3)==".js";});
|
||||
files.forEach(function(x) {
|
||||
vm.runInThisContext(fs.readFileSync('./bits/' + x));
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The README tests verify the snippets in the README:
|
||||
|
||||
```>test.js
|
||||
describe('README', function() {
|
||||
var readme = function() {
|
||||
var unicode_cp10000_255 = cptable[10000].dec[255]; // ˇ
|
||||
assert.equal(unicode_cp10000_255, "ˇ");
|
||||
|
||||
var cp10000_711 = cptable[10000].enc[String.fromCharCode(711)]; // 255
|
||||
assert.equal(cp10000_711, 255);
|
||||
|
||||
var b1 = [0xbb,0xe3,0xd7,0xdc];
|
||||
var s1 = b1.map(function(x) { return String.fromCharCode(x); }).join("");
|
||||
var 汇总 = cptable.utils.decode(936, b1);
|
||||
var buf = cptable.utils.encode(936, 汇总);
|
||||
assert.equal(汇总,"汇总");
|
||||
assert.equal(buf.length, 4);
|
||||
for(var i = 0; i != 4; ++i) assert.equal(b1[i], buf[i]);
|
||||
|
||||
var b2 = [0xf0,0x9f,0x8d,0xa3];
|
||||
var sushi= cptable.utils.decode(65001, b2);
|
||||
var sbuf = cptable.utils.encode(65001, sushi);
|
||||
assert.equal(sushi,"🍣");
|
||||
assert.equal(sbuf.length, 4);
|
||||
for(var i = 0; i != 4; ++i) assert.equal(b2[i], sbuf[i]);
|
||||
|
||||
};
|
||||
it('should be correct', function() {
|
||||
cptable.utils.cache.encache();
|
||||
readme();
|
||||
cptable.utils.cache.decache();
|
||||
readme();
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The consistency tests make sure that encoding and decoding are pseudo inverses:
|
||||
|
||||
```>test.js
|
||||
describe('consistency', function() {
|
||||
cptable = require('./');
|
||||
U = cptable.utils;
|
||||
var chk = function(cptable, cacheit) { return function(x) {
|
||||
it('should consistently process CP ' + x, function() {
|
||||
var cp = cptable[x], D = cp.dec, E = cp.enc;
|
||||
if(cacheit) cptable.utils.cache.encache();
|
||||
else cptable.utils.cache.decache();
|
||||
Object.keys(D).forEach(function(d) {
|
||||
if(E[D[d]] != d) {
|
||||
if(typeof E[D[d]] !== "undefined") return;
|
||||
if(D[d].charCodeAt(0) == 0xFFFD) return;
|
||||
if(D[E[D[d]]] === D[d]) return;
|
||||
throw new Error(x + " e.d[" + d + "] = " + E[D[d]] + "; d[" + d + "]=" + D[d] + "; d.e.d[" + d + "] = " + D[E[D[d]]]);
|
||||
}
|
||||
});
|
||||
Object.keys(E).forEach(function(e) {
|
||||
if(D[E[e]] != e) {
|
||||
throw new Error(x + " d.e[" + e + "] = " + D[E[e]] + "; e[" + e + "]=" + E[e] + "; e.d.e[" + e + "] = " + E[D[E[e]]]);
|
||||
}
|
||||
});
|
||||
var corpus = ["foobar"];
|
||||
corpus.forEach(function(w){
|
||||
assert.equal(U.decode(x,U.encode(x,w)),w);
|
||||
});
|
||||
cptable.utils.cache.encache();
|
||||
});
|
||||
}; };
|
||||
describe('cached', function() {
|
||||
Object.keys(cptable).filter(function(w) { return w == +w; }).forEach(chk(cptable, true));
|
||||
});
|
||||
describe('direct', function() {
|
||||
Object.keys(cptable).filter(function(w) { return w == +w; }).forEach(chk(cptable, false));
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The next tests look at possible entry conditions:
|
||||
|
||||
```
|
||||
describe('entry conditions', function() {
|
||||
it('should fail to load utils if cptable unavailable', function() {
|
||||
var sandbox = {};
|
||||
var ctx = vm.createContext(sandbox);
|
||||
assert.throws(function() {
|
||||
vm.runInContext(fs.readFileSync('cputils.js','utf8'),ctx);
|
||||
});
|
||||
});
|
||||
it('should load utils if cptable is available', function() {
|
||||
var sandbox = {};
|
||||
var ctx = vm.createContext(sandbox);
|
||||
vm.runInContext(fs.readFileSync('cpexcel.js','utf8'),ctx);
|
||||
vm.runInContext(fs.readFileSync('cputils.js','utf8'),ctx);
|
||||
});
|
||||
var chken = function(cp, i) {
|
||||
var c = function(cp, i, e) {
|
||||
var str = cptable.utils.encode(cp,i,e);
|
||||
var arr = cptable.utils.encode(cp,i.split(""),e);
|
||||
assert.deepEqual(str,arr);
|
||||
if(typeof Buffer === 'undefined') return;
|
||||
var buf = cptable.utils.encode(cp,Buffer_from(i),e);
|
||||
assert.deepEqual(str,buf);
|
||||
};
|
||||
cptable.utils.cache.encache();
|
||||
c(cp,i);
|
||||
c(cp,i,'buf');
|
||||
c(cp,i,'arr');
|
||||
c(cp,i,'str');
|
||||
cptable.utils.cache.decache();
|
||||
c(cp,i);
|
||||
c(cp,i,'buf');
|
||||
c(cp,i,'arr');
|
||||
c(cp,i,'str');
|
||||
};
|
||||
describe('encode', function() {
|
||||
it('CP 1252 : sbcs', function() { chken(1252,"foo•bþr"); });
|
||||
it('CP 708 : sbcs', function() { chken(708,"ت and ث smiley faces");});
|
||||
it('CP 936 : dbcs', function() { chken(936, "这是中文字符测试");});
|
||||
});
|
||||
var chkde = function(cp, i) {
|
||||
var c = function(cp, i) {
|
||||
var s;
|
||||
if(typeof Buffer !== 'undefined' && i instanceof Buffer) s = [].map.call(i, function(s){return String.fromCharCode(s); });
|
||||
else s=(i.map) ? i.map(function(s){return String.fromCharCode(s); }) : i;
|
||||
var str = cptable.utils.decode(cp,i);
|
||||
var arr = cptable.utils.decode(cp,s.join?s.join(""):s);
|
||||
assert.deepEqual(str,arr);
|
||||
if(typeof Buffer === 'undefined') return;
|
||||
var buf = cptable.utils.decode(cp,Buffer_from(i));
|
||||
assert.deepEqual(str,buf);
|
||||
};
|
||||
cptable.utils.cache.encache();
|
||||
c(cp,i);
|
||||
cptable.utils.cache.decache();
|
||||
c(cp,i);
|
||||
};
|
||||
describe('decode', function() {
|
||||
it('CP 1252 : sbcs', function() { chkde(1252,[0x66, 0x6f, 0x6f, 0x62, 0x61, 0x72]); }); /* "foobar" */
|
||||
if(typeof Buffer !== 'undefined') it('CP 708 : sbcs', function() { chkde(708, Buffer_from([0xca, 0x20, 0x61, 0x6e, 0x64, 0x20, 0xcb, 0x20, 0x73, 0x6d, 0x69, 0x6c, 0x65, 0x79, 0x20, 0x66, 0x61, 0x63, 0x65, 0x73])); }); /* ("ت and ث smiley faces") */
|
||||
it('CP 936 : dbcs', function() { chkde(936, [0xd5, 0xe2, 0xca, 0xc7, 0xd6, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xb7, 0xfb, 0xb2, 0xe2, 0xca, 0xd4]);}); /* "这是中文字符测试" */
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The `testfile` helper function reads a file and compares to node's read facilities:
|
||||
|
||||
```>test.js
|
||||
function testfile(f,cp,type,skip) {
|
||||
var d = fs.readFileSync(f);
|
||||
var x = fs.readFileSync(f, type);
|
||||
var a = x.split("");
|
||||
var chk = function(cp) {
|
||||
var y = cptable.utils.decode(cp, d);
|
||||
assert.equal(x,y);
|
||||
var z = cptable.utils.encode(cp, x);
|
||||
if(z.length != d.length) throw new Error(f + " " + JSON.stringify(z) + " != " + JSON.stringify(d) + " : " + z.length + " " + d.length);
|
||||
for(var i = 0; i != d.length; ++i) if(d[i] !== z[i]) throw new Error("" + i + " " + d[i] + "!=" + z[i]);
|
||||
if(skip) return;
|
||||
z = cptable.utils.encode(cp, a);
|
||||
if(z.length != d.length) throw new Error(f + " " + JSON.stringify(z) + " != " + JSON.stringify(d) + " : " + z.length + " " + d.length);
|
||||
for(var i = 0; i != d.length; ++i) if(d[i] !== z[i]) throw new Error("" + i + " " + d[i] + "!=" + z[i]);
|
||||
if(f.indexOf("cptable.js") == -1) {
|
||||
cptable.utils.encode(cp, d, 'str');
|
||||
cptable.utils.encode(cp, d, 'arr');
|
||||
}
|
||||
}
|
||||
cptable.utils.cache.encache();
|
||||
chk(cp);
|
||||
if(skip) return;
|
||||
cptable.utils.cache.decache();
|
||||
chk(cp);
|
||||
cptable.utils.cache.encache();
|
||||
}
|
||||
```
|
||||
|
||||
The `utf8` tests verify UTF-8 encoding of the actual JS sources:
|
||||
|
||||
```>test.js
|
||||
describe('node natives', function() {
|
||||
var node = [[65001, 'utf8',1], [1200, 'utf16le',1], [20127, 'ascii',0]];
|
||||
var unicodefiles = ['codepage.md','README.md','cptable.js'];
|
||||
var asciifiles = ['cputils.js'];
|
||||
node.forEach(function(w) {
|
||||
describe(w[1], function() {
|
||||
cptable = require('./');
|
||||
asciifiles.forEach(function(f) {
|
||||
it('should process ' + f, function() { testfile('./misc/'+f+'.'+w[1],w[0],w[1]); });
|
||||
});
|
||||
if(!w[2]) return;
|
||||
unicodefiles.forEach(function(f) {
|
||||
it('should process ' + f, function() { testfile('./misc/'+f+'.'+w[1],w[0],w[1]); });
|
||||
});
|
||||
if(w[1] === 'utf8') it('should process bits', function() {
|
||||
var files = fs.readdirSync('bits').filter(function(x){return x.substr(-3)==".js";});
|
||||
files.forEach(function(f) { testfile('./bits/' + f,w[0],w[1],true); });
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
The `utf*` and `ascii` tests attempt to test other magic formats:
|
||||
|
||||
```>test.js
|
||||
var m = cptable.utils.magic;
|
||||
function cmp(x,z) {
|
||||
assert.equal(x.length, z.length);
|
||||
for(var i = 0; i != z.length; ++i) assert.equal(i+"/"+x.length+""+x[i], i+"/"+z.length+""+z[i]);
|
||||
}
|
||||
Object.keys(m).forEach(function(t){if(t != 16969) describe(m[t], function() {
|
||||
it("should process codepage.md." + m[t], fs.existsSync('./misc/codepage.md.' + m[t]) ?
|
||||
function() {
|
||||
var b = fs.readFileSync('./misc/codepage.md.utf8', "utf8");
|
||||
if(m[t] === "ascii") b = b.replace(/[\u0080-\uffff]*/g,"");
|
||||
var x = fs.readFileSync('./misc/codepage.md.' + m[t]);
|
||||
var y, z;
|
||||
cptable.utils.cache.encache();
|
||||
y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
z = cptable.utils.encode(t, y);
|
||||
if(t != 65000) cmp(x,z);
|
||||
else { assert.equal(y, cptable.utils.decode(t, z)); }
|
||||
cptable.utils.cache.decache();
|
||||
y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
z = cptable.utils.encode(t, y);
|
||||
if(t != 65000) cmp(x,z);
|
||||
else { assert.equal(y, cptable.utils.decode(t, z)); }
|
||||
cptable.utils.cache.encache();
|
||||
cptable.utils.encode(t, y, 'str');
|
||||
cptable.utils.encode(t, y, 'arr');
|
||||
cptable.utils.cache.decache();
|
||||
cptable.utils.encode(t, y, 'str');
|
||||
cptable.utils.encode(t, y, 'arr');
|
||||
cptable.utils.cache.encache();
|
||||
}
|
||||
: null);
|
||||
it("should process README.md." + m[t], fs.existsSync('./misc/README.md.' + m[t]) ?
|
||||
function() {
|
||||
var b = fs.readFileSync('./misc/README.md.utf8', "utf8");
|
||||
if(m[t] === "ascii") b = b.replace(/[\u0080-\uffff]*/g,"");
|
||||
var x = fs.readFileSync('./misc/README.md.' + m[t]);
|
||||
x = [].slice.call(x);
|
||||
cptable.utils.cache.encache();
|
||||
var y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
cptable.utils.cache.decache();
|
||||
var y = cptable.utils.decode(t, x);
|
||||
assert.equal(y,b);
|
||||
cptable.utils.cache.encache();
|
||||
}
|
||||
: null);
|
||||
});});
|
||||
```
|
||||
|
||||
The codepage `6969` is not defined, so operations should fail:
|
||||
|
||||
```>test.js
|
||||
describe('failures', function() {
|
||||
it('should fail to find CP 6969', function() {
|
||||
assert.throws(function(){cptable[6969].dec});
|
||||
assert.throws(function(){cptable[6969].enc});
|
||||
});
|
||||
it('should fail using utils', function() {
|
||||
assert(!cptable.utils.hascp(6969));
|
||||
assert.throws(function(){return cptable.utils.encode(6969, "foobar"); });
|
||||
assert.throws(function(){return cptable.utils.decode(6969, [0x20]); });
|
||||
});
|
||||
it('should fail with black magic', function() {
|
||||
assert(cptable.utils.hascp(16969));
|
||||
assert.throws(function(){return cptable.utils.encode(16969, "foobar"); });
|
||||
assert.throws(function(){return cptable.utils.decode(16969, [0x20]); });
|
||||
});
|
||||
it('should fail when presented with invalid char codes', function() {
|
||||
assert.throws(function(){cptable.utils.cache.decache(); return cptable.utils.encode(20127, [String.fromCharCode(0xAA)]);});
|
||||
});
|
||||
it('should fail to propagate UTF8 BOM in UTF7', function() {
|
||||
["+/v8-abc", "+/v9"].forEach(function(m) { assert.throws(function() {
|
||||
assert.equal(m, cptable.utils.encode(65000, cptable.utils.decode(65000, m)));
|
||||
}); });
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
# Nitty Gritty
|
||||
|
||||
```json>package.json
|
||||
{
|
||||
"name": "codepage",
|
||||
"version": "1.14.0",
|
||||
"author": "SheetJS",
|
||||
"description": "pure-JS library to handle codepages",
|
||||
"keywords": [ "codepage", "iconv", "convert", "strings" ],
|
||||
"bin": {
|
||||
"codepage": "./bin/codepage.njs"
|
||||
},
|
||||
"main": "cputils.js",
|
||||
"types": "types",
|
||||
"browser": {
|
||||
"buffer": "false"
|
||||
},
|
||||
"dependencies": {
|
||||
"commander": "~2.14.1",
|
||||
"exit-on-epipe": "~1.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"voc": "~1.1.0",
|
||||
"mocha": "~2.5.3",
|
||||
"blanket": "~1.2.3",
|
||||
"@sheetjs/uglify-js": "~2.7.3",
|
||||
"@types/node": "^8.0.7",
|
||||
"@types/commander": "^2.12.0",
|
||||
"dtslint": "^0.1.2",
|
||||
"typescript": "2.2.0"
|
||||
},
|
||||
"repository": { "type":"git", "url":"git://github.com/SheetJS/js-codepage.git"},
|
||||
"scripts": {
|
||||
"pretest": "git submodule init && git submodule update",
|
||||
"test": "make test",
|
||||
"build": "make js",
|
||||
"lint": "make fullint",
|
||||
"dtslint": "dtslint types"
|
||||
},
|
||||
"config": {
|
||||
"blanket": {
|
||||
"pattern": "[cputils.js]"
|
||||
}
|
||||
},
|
||||
"alex": {
|
||||
"allow": [
|
||||
"chinese",
|
||||
"european",
|
||||
"german",
|
||||
"japanese",
|
||||
"latin"
|
||||
]
|
||||
},
|
||||
"homepage": "http://sheetjs.com/opensource",
|
||||
"files": [
|
||||
"LICENSE",
|
||||
"README.md",
|
||||
"bin",
|
||||
"bits/*.js",
|
||||
"types/index.d.ts",
|
||||
"types/*.json",
|
||||
"cptable.js",
|
||||
"cputils.js",
|
||||
"dist/sbcs.full.js",
|
||||
"dist/cpexcel.full.js"
|
||||
],
|
||||
"bugs": { "url": "https://github.com/SheetJS/js-codepage/issues" },
|
||||
"license": "Apache-2.0",
|
||||
"engines": { "node": ">=0.8" }
|
||||
}
|
||||
```
|
||||
|
||||
```>.vocrc
|
||||
{ "post": "make js" }
|
||||
```
|
||||
|
||||
```>.gitignore
|
||||
node_modules
|
||||
package-lock.json
|
||||
*.tgz
|
||||
.gitignore
|
||||
codepages/
|
||||
.vocrc
|
||||
make.sh
|
||||
make.njs
|
||||
misc/coverage.html
|
||||
codepage_mini.md
|
||||
ctest/sauce*
|
||||
```
|
||||
|
@ -2,16 +2,22 @@
|
||||
|
||||
The fields of the `pages.csv` manifest are `codepage,url,bytes` (SBCS=1, DBCS=2)
|
||||
|
||||
```>pages.csv
|
||||
37,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT,1
|
||||
437,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT,1
|
||||
500,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT,1
|
||||
737,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT,1
|
||||
775,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT,1
|
||||
850,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT,1
|
||||
852,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT,1
|
||||
855,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT,1
|
||||
857,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT,1
|
||||
860,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT,1
|
||||
861,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT,1
|
||||
862,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT,1
|
||||
Note that the Windows rendering is used for the Mac code pages. The primary
|
||||
difference is the use of the private `0xF8FF` code (which renders as an Apple
|
||||
logo on macs but as garbage on other operating systems). It may be desirable
|
||||
to fall back to the behavior, in which case the files are under APPLE and not
|
||||
MICSFT. This affects codepages 10000, 10006, 10007, 10029, 10079, 10081
|
||||
|
||||
The numbering scheme for the `ISO-8859-X` series is `28590 + X`:
|
||||
|
||||
## Generated Codepages
|
||||
|
||||
The following codepages are available in .NET on Windows:
|
||||
|
||||
- 708 Arabic (ASMO 708)
|
||||
- 720 Arabic (Transparent ASMO); Arabic (DOS)
|
||||
- 858 OEM Multilingual Latin 1 + Euro symbol
|
||||
- 870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
|
||||
- 1047 IBM EBCDIC Latin 1/Open System
|
||||
- 1140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
|
||||
- 1141 IBM EBCDIC Germany (20273 + Euro sym
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -2,16 +2,22 @@
|
||||
|
||||
The fields of the +AGA-pages.csv+AGA manifest are +AGA-codepage,url,bytes+AGA (SBCS+AD0-1, DBCS+AD0-2)
|
||||
|
||||
+AGAAYABgAD4-pages.csv
|
||||
37,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT,1
|
||||
437,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT,1
|
||||
500,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT,1
|
||||
737,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT,1
|
||||
775,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT,1
|
||||
850,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT,1
|
||||
852,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT,1
|
||||
855,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT,1
|
||||
857,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT,1
|
||||
860,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT,1
|
||||
861,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT,1
|
||||
862,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT,1
|
||||
Note that the Windows rendering is used for the Mac code pages. The primary
|
||||
difference is the use of the private +AGA-0xF8FF+AGA code (which renders as an Apple
|
||||
logo on macs but as garbage on other operating systems). It may be desirable
|
||||
to fall back to the behavior, in which case the files are under APPLE and not
|
||||
MICSFT. This affects codepages 10000, 10006, 10007, 10029, 10079, 10081
|
||||
|
||||
The numbering scheme for the +AGA-ISO-8859-X+AGA series is +AGA-28590 +- X+AGA:
|
||||
|
||||
+ACMAIw Generated Codepages
|
||||
|
||||
The following codepages are available in .NET on Windows:
|
||||
|
||||
- 708 Arabic (ASMO 708)
|
||||
- 720 Arabic (Transparent ASMO)+ADs Arabic (DOS)
|
||||
- 858 OEM Multilingual Latin 1 +- Euro symbol
|
||||
- 870 IBM EBCDIC Multilingual/ROECE (Latin 2)+ADs IBM EBCDIC Multilingual Latin 2
|
||||
- 1047 IBM EBCDIC Latin 1/Open System
|
||||
- 1140 IBM EBCDIC US-Canada (037 +- Euro symbol)+ADs IBM EBCDIC (US-Canada-Euro)
|
||||
- 1141 IBM EBCDIC Germany (20273 +- Euro sym
|
@ -2,16 +2,22 @@
|
||||
|
||||
The fields of the `pages.csv` manifest are `codepage,url,bytes` (SBCS=1, DBCS=2)
|
||||
|
||||
```>pages.csv
|
||||
37,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT,1
|
||||
437,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT,1
|
||||
500,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT,1
|
||||
737,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT,1
|
||||
775,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT,1
|
||||
850,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT,1
|
||||
852,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT,1
|
||||
855,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT,1
|
||||
857,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT,1
|
||||
860,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT,1
|
||||
861,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT,1
|
||||
862,http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT,1
|
||||
Note that the Windows rendering is used for the Mac code pages. The primary
|
||||
difference is the use of the private `0xF8FF` code (which renders as an Apple
|
||||
logo on macs but as garbage on other operating systems). It may be desirable
|
||||
to fall back to the behavior, in which case the files are under APPLE and not
|
||||
MICSFT. This affects codepages 10000, 10006, 10007, 10029, 10079, 10081
|
||||
|
||||
The numbering scheme for the `ISO-8859-X` series is `28590 + X`:
|
||||
|
||||
## Generated Codepages
|
||||
|
||||
The following codepages are available in .NET on Windows:
|
||||
|
||||
- 708 Arabic (ASMO 708)
|
||||
- 720 Arabic (Transparent ASMO); Arabic (DOS)
|
||||
- 858 OEM Multilingual Latin 1 + Euro symbol
|
||||
- 870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
|
||||
- 1047 IBM EBCDIC Latin 1/Open System
|
||||
- 1140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
|
||||
- 1141 IBM EBCDIC Germany (20273 + Euro sym
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1700
misc/cptable.js.utf7
1700
misc/cptable.js.utf7
File diff suppressed because it is too large
Load Diff
1700
misc/cptable.js.utf8
1700
misc/cptable.js.utf8
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/* vim: set ft=javascript: */
|
||||
/*jshint newcap: false */
|
||||
(function(root, factory) {
|
||||
@ -40,11 +40,11 @@
|
||||
var cca = function cca(x) { return x.charCodeAt(0); };
|
||||
|
||||
var has_buf = (typeof Buffer !== 'undefined');
|
||||
var Buffer_from = function(){};
|
||||
var Buffer_from = function(){};
|
||||
if(has_buf) {
|
||||
var nbfs = !Buffer.from;
|
||||
if(!nbfs) try { Buffer.from("foo", "utf8"); } catch(e) { nbfs = true; }
|
||||
Buffer_from = nbfs ? function(buf, enc) { return (enc) ? new Buffer(buf, enc) : new Buffer(buf); } : Buffer.from.bind(Buffer);
|
||||
var nbfs = !Buffer.from;
|
||||
if(!nbfs) try { Buffer.from("foo", "utf8"); } catch(e) { nbfs = true; }
|
||||
Buffer_from = nbfs ? function(buf, enc) { return (enc) ? new Buffer(buf, enc) : new Buffer(buf); } : Buffer.from.bind(Buffer);
|
||||
// $FlowIgnore
|
||||
if(!Buffer.allocUnsafe) Buffer.allocUnsafe = function(n) { return new Buffer(n); };
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,4 +1,4 @@
|
||||
/+ACo cputils.js (C) 2013-present SheetJS -- http://sheetjs.com +ACo-/
|
||||
/+ACoAIQ cputils.js (C) 2013-present SheetJS -- http://sheetjs.com +ACo-/
|
||||
/+ACo vim: set ft+AD0-javascript: +ACo-/
|
||||
/+ACo-jshint newcap: false +ACo-/
|
||||
(function(root, factory) +AHs
|
||||
@ -40,11 +40,11 @@
|
||||
var cca +AD0 function cca(x) +AHs return x.charCodeAt(0)+ADs +AH0AOw
|
||||
|
||||
var has+AF8-buf +AD0 (typeof Buffer +ACEAPQA9 'undefined')+ADs
|
||||
var Buffer+AF8-from +AD0 function()+AHsAfQA7
|
||||
var Buffer+AF8-from +AD0 function()+AHsAfQA7
|
||||
if(has+AF8-buf) +AHs
|
||||
var nbfs +AD0 +ACE-Buffer.from+ADs
|
||||
if(+ACE-nbfs) try +AHs Buffer.from(+ACI-foo+ACI, +ACI-utf8+ACI)+ADs +AH0 catch(e) +AHs nbfs +AD0 true+ADs +AH0
|
||||
Buffer+AF8-from +AD0 nbfs ? function(buf, enc) +AHs return (enc) ? new Buffer(buf, enc) : new Buffer(buf)+ADs +AH0 : Buffer.from.bind(Buffer)+ADs
|
||||
var nbfs +AD0 +ACE-Buffer.from+ADs
|
||||
if(+ACE-nbfs) try +AHs Buffer.from(+ACI-foo+ACI, +ACI-utf8+ACI)+ADs +AH0 catch(e) +AHs nbfs +AD0 true+ADs +AH0
|
||||
Buffer+AF8-from +AD0 nbfs ? function(buf, enc) +AHs return (enc) ? new Buffer(buf, enc) : new Buffer(buf)+ADs +AH0 : Buffer.from.bind(Buffer)+ADs
|
||||
// +ACQ-FlowIgnore
|
||||
if(+ACE-Buffer.allocUnsafe) Buffer.allocUnsafe +AD0 function(n) +AHs return new Buffer(n)+ADs +AH0AOw
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/* vim: set ft=javascript: */
|
||||
/*jshint newcap: false */
|
||||
(function(root, factory) {
|
||||
@ -40,11 +40,11 @@
|
||||
var cca = function cca(x) { return x.charCodeAt(0); };
|
||||
|
||||
var has_buf = (typeof Buffer !== 'undefined');
|
||||
var Buffer_from = function(){};
|
||||
var Buffer_from = function(){};
|
||||
if(has_buf) {
|
||||
var nbfs = !Buffer.from;
|
||||
if(!nbfs) try { Buffer.from("foo", "utf8"); } catch(e) { nbfs = true; }
|
||||
Buffer_from = nbfs ? function(buf, enc) { return (enc) ? new Buffer(buf, enc) : new Buffer(buf); } : Buffer.from.bind(Buffer);
|
||||
var nbfs = !Buffer.from;
|
||||
if(!nbfs) try { Buffer.from("foo", "utf8"); } catch(e) { nbfs = true; }
|
||||
Buffer_from = nbfs ? function(buf, enc) { return (enc) ? new Buffer(buf, enc) : new Buffer(buf); } : Buffer.from.bind(Buffer);
|
||||
// $FlowIgnore
|
||||
if(!Buffer.allocUnsafe) Buffer.allocUnsafe = function(n) { return new Buffer(n); };
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "codepage",
|
||||
"version": "1.14.0",
|
||||
"version": "1.15.0",
|
||||
"author": "SheetJS",
|
||||
"description": "pure-JS library to handle codepages",
|
||||
"keywords": [
|
||||
@ -49,7 +49,7 @@
|
||||
"latin"
|
||||
]
|
||||
},
|
||||
"homepage": "http://sheetjs.com/opensource",
|
||||
"homepage": "https://sheetjs.com/",
|
||||
"files": [
|
||||
"LICENSE",
|
||||
"README.md",
|
||||
|
@ -121,11 +121,6 @@
|
||||
21866,,1
|
||||
29001,,1
|
||||
38598,,1
|
||||
50220,,2
|
||||
50221,,2
|
||||
50222,,2
|
||||
50225,,2
|
||||
50227,,2
|
||||
51932,,2
|
||||
51936,,2
|
||||
51949,,2
|
||||
|
|
4
sbcs.js
4
sbcs.js
@ -1,6 +1,6 @@
|
||||
/* sbcs.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*! sbcs.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/*jshint -W100 */
|
||||
var cptable = {version:"1.14.0"};
|
||||
var cptable = {version:"1.15.0"};
|
||||
cptable[37] = (function(){ var d = "\u0000\u0001\u0002\u0003\t\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013
\b\u0018\u0019\u001c\u001d\u001e\u001f\n\u0017\u001b\u0005\u0006\u0007\u0016\u0004\u0014\u0015\u001a âäàáãåçñ¢.<(+|&éêëèíîïìß!$*);¬-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿ÐÝÞ®^£¥·©§¶¼½¾[]¯¨´×{ABCDEFGHIôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[437] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
cptable[500] = (function(){ var d = "\u0000\u0001\u0002\u0003\t\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013
\b\u0018\u0019\u001c\u001d\u001e\u001f\n\u0017\u001b\u0005\u0006\u0007\u0016\u0004\u0014\u0015\u001a âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHIôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })();
|
||||
|
2
test.js
2
test.js
@ -1,5 +1,6 @@
|
||||
var fs = require('fs'), assert = require('assert'), vm = require('vm');
|
||||
var cptable, sbcs;
|
||||
|
||||
var Buffer_from = function(){};
|
||||
|
||||
if(typeof Buffer !== 'undefined') {
|
||||
@ -7,6 +8,7 @@ if(typeof Buffer !== 'undefined') {
|
||||
if(!nbfs) try { Buffer.from("foo", "utf8"); } catch(e) { nbfs = true; }
|
||||
Buffer_from = nbfs ? function(buf, enc) { return (enc) ? new Buffer(buf, enc) : new Buffer(buf); } : Buffer.from.bind(Buffer);
|
||||
}
|
||||
|
||||
describe('source', function() {
|
||||
it('should load node', function() { cptable = require('./'); });
|
||||
it('should load sbcs', function() { sbcs = require('./sbcs'); });
|
||||
|
@ -9,6 +9,7 @@
|
||||
"paths": { "codepage": ["."] },
|
||||
"types": [],
|
||||
"noEmit": true,
|
||||
"strictFunctionTypes": true,
|
||||
"forceConsistentCasingInFileNames": true
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user