diff --git a/encoding/unicode.js b/encoding/unicode.js index 9e287cc..bbf77b3 100644 --- a/encoding/unicode.js +++ b/encoding/unicode.js @@ -12,7 +12,7 @@ module.exports.UTF_16BE = function() { this.match = function(det) { var input = det.fRawInput; - if (input.length >= 2 && ((input[0] & 0xFF) == 0xFE && (input[1] & 0xFF) == 0xFF)) + if (input.length >= 2 && ((input[0] & 0xff) == 0xfe && (input[1] & 0xff) == 0xff)) return new Match(det, this, confidence = 100); // TODO: Do some statistics to check for unsigned UTF-16BE @@ -27,7 +27,7 @@ module.exports.UTF_16LE = function() { this.match = function(det) { var input = det.fRawInput; - if (input.length >= 2 && ((input[0] & 0xFF) == 0xFF && (input[1] & 0xFF) == 0xFE)) { + if (input.length >= 2 && ((input[0] & 0xff) == 0xff && (input[1] & 0xff) == 0xfe)) { // An LE BOM is present. if (input.length >= 4 && input[2] == 0x00 && input[3] == 0x00) // It is probably UTF-32 LE, not UTF-16 @@ -89,8 +89,8 @@ module.exports.UTF_32BE = function() { return 'UTF-32BE'; }; this.getChar = function(input, index) { - return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 | - (input[index + 2] & 0xFF) << 8 | (input[index + 3] & 0xFF); + return (input[index + 0] & 0xff) << 24 | (input[index + 1] & 0xff) << 16 | + (input[index + 2] & 0xff) << 8 | (input[index + 3] & 0xff); }; }; util.inherits(module.exports.UTF_32BE, UTF_32); @@ -100,8 +100,8 @@ module.exports.UTF_32LE = function() { return 'UTF-32LE'; }; this.getChar = function(input, index) { - return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 | - (input[index + 1] & 0xFF) << 8 | (input[index + 0] & 0xFF); + return (input[index + 3] & 0xff) << 24 | (input[index + 2] & 0xff) << 16 | + (input[index + 1] & 0xff) << 8 | (input[index + 0] & 0xff); }; }; util.inherits(module.exports.UTF_32LE, UTF_32); diff --git a/encoding/utf8.js b/encoding/utf8.js index dbc93c7..b9df2d8 100644 --- a/encoding/utf8.js +++ b/encoding/utf8.js @@ -18,7 +18,7 @@ module.exports = function() { confidence; if (det.fRawLength >= 3 && - (input[0] & 0xFF) == 0xef && (input[1] & 0xFF) == 0xbb && (input[2] & 0xFF) == 0xbf) { + (input[0] & 0xff) == 0xef && (input[1] & 0xff) == 0xbb && (input[2] & 0xff) == 0xbf) { hasBOM = true; } @@ -28,7 +28,6 @@ module.exports = function() { if ((b & 0x80) == 0) continue; // ASCII - // Hi bit on char found. Figure out how long the sequence should be if ((b & 0x0e0) == 0x0c0) { trailBytes = 1; @@ -38,20 +37,18 @@ module.exports = function() { trailBytes = 3; } else { numInvalid++; - if (numInvalid > 5) { + if (numInvalid > 5) break; - } trailBytes = 0; } // Verify that we've got the right number of trail bytes in the sequence for (;;) { i++; - if (i >= det.fRawLength) { + if (i >= det.fRawLength) break; - } - b = input[i]; - if ((b & 0xc0) != 0x080) { + + if ((input[i] & 0xc0) != 0x080) { numInvalid++; break; } diff --git a/match.js b/match.js index 5516267..5b5012e 100644 --- a/match.js +++ b/match.js @@ -2,4 +2,5 @@ module.exports = function(det, rec, confidence, name, lang) { this.confidence = confidence; this.name = name || rec.name(det); -}; \ No newline at end of file + this.lang = lang; +};