From 9c43af4fc8967000c8ef28a6f27d4df54d901c9f Mon Sep 17 00:00:00 2001 From: Dmitry Shirokov Date: Fri, 16 Aug 2013 14:54:08 +1000 Subject: [PATCH] cleanup utf8 --- encoding/utf8.js | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/encoding/utf8.js b/encoding/utf8.js index 4203bba..dbc93c7 100644 --- a/encoding/utf8.js +++ b/encoding/utf8.js @@ -14,7 +14,6 @@ module.exports = function() { numValid = 0, numInvalid = 0, input = det.fRawInput, - i, trailBytes = 0, confidence; @@ -24,11 +23,11 @@ module.exports = function() { } // Scan for multi-byte sequences - for (i=0; i numInvalid * 10) { + else if (hasBOM && numValid > numInvalid * 10) confidence = 80; - } else if (numValid > 3 && numInvalid == 0) { + else if (numValid > 3 && numInvalid == 0) confidence = 100; - } else if (numValid > 0 && numInvalid == 0) { + else if (numValid > 0 && numInvalid == 0) confidence = 80; - } else if (numValid == 0 && numInvalid == 0) { + else if (numValid == 0 && numInvalid == 0) // Plain ASCII. confidence = 10; - } else if (numValid > numInvalid * 10) { + else if (numValid > numInvalid * 10) // Probably corruput utf-8 data. Valid sequences aren't likely by chance. confidence = 25; - } - return confidence == 0 ? null : new Match(det, this, confidence); + else + return null + + return new Match(det, this, confidence); }; };