cleaning up the code

2013-08-22 09:39:36 +10:00 · 2013-08-22 09:39:36 +10:00 · 7bd721b1d7
parent d084074d0c
commit 7bd721b1d7
3 changed files with 13 additions and 15 deletions
--- a/encoding/unicode.js
+++ b/encoding/unicode.js
@ -12,7 +12,7 @@ module.exports.UTF_16BE = function() {
    this.match = function(det) {
        var input = det.fRawInput;
-        if (input.length >= 2 && ((input[0] & 0xFF) == 0xFE && (input[1] & 0xFF) == 0xFF))
+        if (input.length >= 2 && ((input[0] & 0xff) == 0xfe && (input[1] & 0xff) == 0xff))
            return new Match(det, this, confidence = 100);
        // TODO: Do some statistics to check for unsigned UTF-16BE
@ -27,7 +27,7 @@ module.exports.UTF_16LE = function() {
    this.match = function(det) {
        var input = det.fRawInput;
-        if (input.length >= 2 && ((input[0] & 0xFF) == 0xFF && (input[1] & 0xFF) == 0xFE)) {
+        if (input.length >= 2 && ((input[0] & 0xff) == 0xff && (input[1] & 0xff) == 0xfe)) {
           // An LE BOM is present.
           if (input.length >= 4 && input[2] == 0x00 && input[3] == 0x00)
               // It is probably UTF-32 LE, not UTF-16
@ -89,8 +89,8 @@ module.exports.UTF_32BE = function() {
        return 'UTF-32BE';
    };
    this.getChar = function(input, index) {
-        return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 |
+        return (input[index + 0] & 0xff) << 24 | (input[index + 1] & 0xff) << 16 |
-               (input[index + 2] & 0xFF) <<  8 | (input[index + 3] & 0xFF);
+               (input[index + 2] & 0xff) <<  8 | (input[index + 3] & 0xff);
    };
 };
 util.inherits(module.exports.UTF_32BE, UTF_32);
@ -100,8 +100,8 @@ module.exports.UTF_32LE = function() {
        return 'UTF-32LE';
    };
    this.getChar = function(input, index) {
-        return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 |
+        return (input[index + 3] & 0xff) << 24 | (input[index + 2] & 0xff) << 16 |
-               (input[index + 1] & 0xFF) <<  8 | (input[index + 0] & 0xFF);
+               (input[index + 1] & 0xff) <<  8 | (input[index + 0] & 0xff);
    };
 };
 util.inherits(module.exports.UTF_32LE, UTF_32);
--- a/encoding/utf8.js
+++ b/encoding/utf8.js
@ -18,7 +18,7 @@ module.exports = function() {
            confidence;
        if (det.fRawLength >= 3 &&
-            (input[0] & 0xFF) == 0xef && (input[1] & 0xFF) == 0xbb && (input[2] & 0xFF) == 0xbf) {
+            (input[0] & 0xff) == 0xef && (input[1] & 0xff) == 0xbb && (input[2] & 0xff) == 0xbf) {
            hasBOM = true;
        }
@ -28,7 +28,6 @@ module.exports = function() {
            if ((b & 0x80) == 0)
                continue; // ASCII
            // Hi bit on char found.  Figure out how long the sequence should be
            if ((b & 0x0e0) == 0x0c0) {
                trailBytes = 1;
@ -38,20 +37,18 @@ module.exports = function() {
                trailBytes = 3;
            } else {
                numInvalid++;
-                if (numInvalid > 5) {
+                if (numInvalid > 5)
                    break;
                }
                trailBytes = 0;
            }
            // Verify that we've got the right number of trail bytes in the sequence
            for (;;) {
                i++;
-                if (i >= det.fRawLength) {
+                if (i >= det.fRawLength)
                    break;
-                }
+
-                b = input[i];
+                if ((input[i] & 0xc0) != 0x080) {
                if ((b & 0xc0) != 0x080) {
                    numInvalid++;
                    break;
                }
--- a/match.js
+++ b/match.js
@ -2,4 +2,5 @@
 module.exports = function(det, rec, confidence, name, lang) {
    this.confidence = confidence;
    this.name       = name || rec.name(det);
    this.lang       = lang;
 };