Incorrect crc returned for a surrogate character #5

Closed
opened 2016-01-10 14:15:11 +00:00 by florentbr · 1 comment
florentbr commented 2016-01-10 14:15:11 +00:00 (Migrated from github.com)

The crc for surrogate characters is not correctly computed.
Here is an example with the U+24B62 character (𤭢):

CRC32.str('𤭢')

Result: -40863161
Expected: 1512193127

Function related to this issue:

function crc32_str(str) {
    for(var crc = -1, i = 0, L=str.length, c, d; i < L;) {
        c = str.charCodeAt(i++);
        if(c < 0x80) {
            crc = (crc >>> 8) ^ table[(crc ^ c) & 0xFF];
        } else if(c < 0x800) {
            crc = (crc >>> 8) ^ table[(crc ^ (192|((c>>6)&31))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF];
        } else if(c >= 0xD800 && c < 0xE000) {
            c = (c&1023)+64; d = str.charCodeAt(i++) & 1023;
            crc = (crc >>> 8) ^ table[(crc ^ (240|((c>>8)&7))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>2)&63))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|(c&3))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|(d&63))) & 0xFF];
        } else {
            crc = (crc >>> 8) ^ table[(crc ^ (224|((c>>12)&15))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>6)&63))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF];
        }
    }
    return crc ^ -1;
}

Fix:

function crc32_str(str) {
    for(var crc = -1, i = 0, L=str.length, c; i < L;) {
        c = str.charCodeAt(i++);
        if(c < 0x80) {
            crc = (crc >>> 8) ^ table[(crc ^ c) & 0xFF];
        } else if(c < 0x800) {
            crc = (crc >>> 8) ^ table[(crc ^ (192|((c>>6)&31))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF];
        } else if(c >= 0xD800 && c < 0xE000) {
            c = (((c&1023) << 10)|((str.charCodeAt(i++) & 1023))) + 0x10000;
            crc = (crc >>> 8) ^ table[(crc ^ (240|(c>>18))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>12)&63))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>6)&63))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF];
        } else {
            crc = (crc >>> 8) ^ table[(crc ^ (224|((c>>12)&15))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>6)&63))) & 0xFF];
            crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF];
        }
    }
    return crc ^ -1;
}
The crc for surrogate characters is not correctly computed. Here is an example with the U+24B62 character (𤭢): CRC32.str('𤭢') Result: -40863161 Expected: 1512193127 Function related to this issue: ``` javascript function crc32_str(str) { for(var crc = -1, i = 0, L=str.length, c, d; i < L;) { c = str.charCodeAt(i++); if(c < 0x80) { crc = (crc >>> 8) ^ table[(crc ^ c) & 0xFF]; } else if(c < 0x800) { crc = (crc >>> 8) ^ table[(crc ^ (192|((c>>6)&31))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF]; } else if(c >= 0xD800 && c < 0xE000) { c = (c&1023)+64; d = str.charCodeAt(i++) & 1023; crc = (crc >>> 8) ^ table[(crc ^ (240|((c>>8)&7))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>2)&63))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|(c&3))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(d&63))) & 0xFF]; } else { crc = (crc >>> 8) ^ table[(crc ^ (224|((c>>12)&15))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>6)&63))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF]; } } return crc ^ -1; } ``` Fix: ``` javascript function crc32_str(str) { for(var crc = -1, i = 0, L=str.length, c; i < L;) { c = str.charCodeAt(i++); if(c < 0x80) { crc = (crc >>> 8) ^ table[(crc ^ c) & 0xFF]; } else if(c < 0x800) { crc = (crc >>> 8) ^ table[(crc ^ (192|((c>>6)&31))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF]; } else if(c >= 0xD800 && c < 0xE000) { c = (((c&1023) << 10)|((str.charCodeAt(i++) & 1023))) + 0x10000; crc = (crc >>> 8) ^ table[(crc ^ (240|(c>>18))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>12)&63))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>6)&63))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF]; } else { crc = (crc >>> 8) ^ table[(crc ^ (224|((c>>12)&15))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|((c>>6)&63))) & 0xFF]; crc = (crc >>> 8) ^ table[(crc ^ (128|(c&63))) & 0xFF]; } } return crc ^ -1; } ```
SheetJSDev commented 2016-01-13 17:20:45 +00:00 (Migrated from github.com)

@florentbr thanks for checking in!

A bitshift was missing. Here is the fix:

-                       crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|(c&3))) &
+                       crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|((c&3)<<4

To make sure we have better test coverage, there are some new tests checking each UTF-8 character

@florentbr thanks for checking in! A bitshift was missing. Here is the fix: ``` diff - crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|(c&3))) & + crc = (crc >>> 8) ^ table[(crc ^ (128|((d>>6)&15)|((c&3)<<4 ``` To make sure we have better test coverage, there are some new tests checking each UTF-8 character
Sign in to join this conversation.
No Milestone
No project
No Assignees
1 Participants
Notifications
Due Date
The due date is invalid or out of range. Please use the format 'yyyy-mm-dd'.

No due date set.

Dependencies

No dependencies set.

Reference: sheetjs/js-crc32#5
No description provided.