minor changes

This commit is contained in:
Dmitry Shirokov 2013-08-06 11:35:58 +10:00
parent 17efdf03be
commit b60a93a538
4 changed files with 22 additions and 24 deletions

View File

@ -8,12 +8,12 @@ var util = require('util'),
module.exports.UTF_16BE = function() { module.exports.UTF_16BE = function() {
this.name = function() { this.name = function() {
return "UTF-16BE"; return 'UTF-16BE';
}; };
this.match = function(det) { this.match = function(det) {
var input = det.fRawInput; var input = det.fRawInput;
if (input.length>=2 && ((input[0] & 0xFF) == 0xFE && (input[1] & 0xFF) == 0xFF)) { if (input.length >= 2 && ((input[0] & 0xFF) == 0xFE && (input[1] & 0xFF) == 0xFF)) {
var confidence = 100; var confidence = 100;
return new Match(det, this, confidence); return new Match(det, this, confidence);
} }
@ -25,14 +25,14 @@ module.exports.UTF_16BE = function() {
module.exports.UTF_16LE = function() { module.exports.UTF_16LE = function() {
this.name = function() { this.name = function() {
return "UTF-16LE"; return 'UTF-16LE';
}; };
this.match = function(det) { this.match = function(det) {
var input = det.fRawInput; var input = det.fRawInput;
if (input.length >= 2 && ((input[0] & 0xFF) == 0xFF && (input[1] & 0xFF) == 0xFE)) { if (input.length >= 2 && ((input[0] & 0xFF) == 0xFF && (input[1] & 0xFF) == 0xFE)) {
// An LE BOM is present. // An LE BOM is present.
if (input.length>=4 && input[2] == 0x00 && input[3] == 0x00) { if (input.length >= 4 && input[2] == 0x00 && input[3] == 0x00) {
// It is probably UTF-32 LE, not UTF-16 // It is probably UTF-32 LE, not UTF-16
return null; return null;
} }
@ -74,9 +74,9 @@ UTF_32.prototype.match = function(det) {
// Cook up some sort of confidence score, based on presence of a BOM // Cook up some sort of confidence score, based on presence of a BOM
// and the existence of valid and/or invalid multi-byte sequences. // and the existence of valid and/or invalid multi-byte sequences.
if (hasBOM && numInvalid==0) { if (hasBOM && numInvalid == 0) {
confidence = 100; confidence = 100;
} else if (hasBOM && numValid > numInvalid*10) { } else if (hasBOM && numValid > numInvalid * 10) {
confidence = 80; confidence = 80;
} else if (numValid > 3 && numInvalid == 0) { } else if (numValid > 3 && numInvalid == 0) {
confidence = 100; confidence = 100;
@ -93,7 +93,7 @@ UTF_32.prototype.match = function(det) {
module.exports.UTF_32BE = function() { module.exports.UTF_32BE = function() {
this.name = function() { this.name = function() {
return "UTF-32BE"; return 'UTF-32BE';
}; };
this.getChar = function(input, index) { this.getChar = function(input, index) {
return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 | return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 |
@ -104,7 +104,7 @@ util.inherits(module.exports.UTF_32BE, UTF_32);
module.exports.UTF_32LE = function() { module.exports.UTF_32LE = function() {
this.name = function() { this.name = function() {
return "UTF-32LE"; return 'UTF-32LE';
}; };
this.getChar = function(input, index) { this.getChar = function(input, index) {
return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 | return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 |

View File

@ -6,7 +6,7 @@ var Match = require ('../match');
*/ */
module.exports = function() { module.exports = function() {
this.name = function() { this.name = function() {
return "UTF-8"; return 'UTF-8';
}; };
this.match = function(det) { this.match = function(det) {
@ -48,7 +48,7 @@ module.exports = function() {
// Verify that we've got the right number of trail bytes in the sequence // Verify that we've got the right number of trail bytes in the sequence
for (;;) { for (;;) {
i++; i++;
if (i>=det.fRawLength) { if (i >= det.fRawLength) {
break; break;
} }
b = input[i]; b = input[i];
@ -66,9 +66,9 @@ module.exports = function() {
// Cook up some sort of confidence score, based on presense of a BOM // Cook up some sort of confidence score, based on presense of a BOM
// and the existence of valid and/or invalid multi-byte sequences. // and the existence of valid and/or invalid multi-byte sequences.
confidence = 0; confidence = 0;
if (hasBOM && numInvalid==0) { if (hasBOM && numInvalid == 0) {
confidence = 100; confidence = 100;
} else if (hasBOM && numValid > numInvalid*10) { } else if (hasBOM && numValid > numInvalid * 10) {
confidence = 80; confidence = 80;
} else if (numValid > 3 && numInvalid == 0) { } else if (numValid > 3 && numInvalid == 0) {
confidence = 100; confidence = 100;
@ -77,7 +77,7 @@ module.exports = function() {
} else if (numValid == 0 && numInvalid == 0) { } else if (numValid == 0 && numInvalid == 0) {
// Plain ASCII. // Plain ASCII.
confidence = 10; confidence = 10;
} else if (numValid > numInvalid*10) { } else if (numValid > numInvalid * 10) {
// Probably corruput utf-8 data. Valid sequences aren't likely by chance. // Probably corruput utf-8 data. Valid sequences aren't likely by chance.
confidence = 25; confidence = 25;
} }

View File

@ -62,27 +62,24 @@ module.exports.detect = function(buffer) {
fInputLen: buffer.length fInputLen: buffer.length
}; };
var matches = recognisers.map(function(rec) { var match = recognisers.map(function(rec) {
return rec.match(context); return rec.match(context);
}).filter(function(match) { }).filter(function(match) {
return !!match; return !!match;
}); }).sort(function(a, b) {
matches.sort(function(a, b) {
return a.confidence - b.confidence; return a.confidence - b.confidence;
}); }).pop();
return matches.length ? matches.pop().name : null; return match ? match.name : null;
}; };
module.exports.detectFile = function(filepath, fn) { module.exports.detectFile = function(filepath, fn) {
fs.readFile(filepath, function(err, res) { fs.readFile(filepath, function(err, res) {
if (err) if (err) return fn(err, null);
return fn(err, null);
fn(null, self.detect(res)); fn(null, self.detect(res));
}); });
}; };
module.exports.detectFileSync = function(filepath) { module.exports.detectFileSync = function(filepath) {
return self.detect(fs.readFileSync(filepath)); return self.detect(fs.readFileSync(filepath));
}; };

View File

@ -1,6 +1,6 @@
{ {
"name": "chardet", "name": "chardet",
"version": "0.0.6", "version": "0.0.7",
"homepage": "https://github.com/runk/node-chardet", "homepage": "https://github.com/runk/node-chardet",
"description": "Character detector", "description": "Character detector",
"keywords": [ "keywords": [
@ -34,5 +34,6 @@
"readmeFilename": "README.md", "readmeFilename": "README.md",
"directories": { "directories": {
"test": "test" "test": "test"
} },
"license" : "MIT"
} }