minor changes

This commit is contained in:
Dmitry Shirokov 2013-08-06 11:35:58 +10:00
parent 17efdf03be
commit b60a93a538
4 changed files with 22 additions and 24 deletions

View File

@ -8,12 +8,12 @@ var util = require('util'),
module.exports.UTF_16BE = function() {
this.name = function() {
return "UTF-16BE";
return 'UTF-16BE';
};
this.match = function(det) {
var input = det.fRawInput;
if (input.length>=2 && ((input[0] & 0xFF) == 0xFE && (input[1] & 0xFF) == 0xFF)) {
if (input.length >= 2 && ((input[0] & 0xFF) == 0xFE && (input[1] & 0xFF) == 0xFF)) {
var confidence = 100;
return new Match(det, this, confidence);
}
@ -25,14 +25,14 @@ module.exports.UTF_16BE = function() {
module.exports.UTF_16LE = function() {
this.name = function() {
return "UTF-16LE";
return 'UTF-16LE';
};
this.match = function(det) {
var input = det.fRawInput;
if (input.length >= 2 && ((input[0] & 0xFF) == 0xFF && (input[1] & 0xFF) == 0xFE)) {
// An LE BOM is present.
if (input.length>=4 && input[2] == 0x00 && input[3] == 0x00) {
if (input.length >= 4 && input[2] == 0x00 && input[3] == 0x00) {
// It is probably UTF-32 LE, not UTF-16
return null;
}
@ -74,9 +74,9 @@ UTF_32.prototype.match = function(det) {
// Cook up some sort of confidence score, based on presence of a BOM
// and the existence of valid and/or invalid multi-byte sequences.
if (hasBOM && numInvalid==0) {
if (hasBOM && numInvalid == 0) {
confidence = 100;
} else if (hasBOM && numValid > numInvalid*10) {
} else if (hasBOM && numValid > numInvalid * 10) {
confidence = 80;
} else if (numValid > 3 && numInvalid == 0) {
confidence = 100;
@ -93,7 +93,7 @@ UTF_32.prototype.match = function(det) {
module.exports.UTF_32BE = function() {
this.name = function() {
return "UTF-32BE";
return 'UTF-32BE';
};
this.getChar = function(input, index) {
return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 |
@ -104,7 +104,7 @@ util.inherits(module.exports.UTF_32BE, UTF_32);
module.exports.UTF_32LE = function() {
this.name = function() {
return "UTF-32LE";
return 'UTF-32LE';
};
this.getChar = function(input, index) {
return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 |

View File

@ -6,7 +6,7 @@ var Match = require ('../match');
*/
module.exports = function() {
this.name = function() {
return "UTF-8";
return 'UTF-8';
};
this.match = function(det) {
@ -48,7 +48,7 @@ module.exports = function() {
// Verify that we've got the right number of trail bytes in the sequence
for (;;) {
i++;
if (i>=det.fRawLength) {
if (i >= det.fRawLength) {
break;
}
b = input[i];
@ -66,9 +66,9 @@ module.exports = function() {
// Cook up some sort of confidence score, based on presense of a BOM
// and the existence of valid and/or invalid multi-byte sequences.
confidence = 0;
if (hasBOM && numInvalid==0) {
if (hasBOM && numInvalid == 0) {
confidence = 100;
} else if (hasBOM && numValid > numInvalid*10) {
} else if (hasBOM && numValid > numInvalid * 10) {
confidence = 80;
} else if (numValid > 3 && numInvalid == 0) {
confidence = 100;
@ -77,7 +77,7 @@ module.exports = function() {
} else if (numValid == 0 && numInvalid == 0) {
// Plain ASCII.
confidence = 10;
} else if (numValid > numInvalid*10) {
} else if (numValid > numInvalid * 10) {
// Probably corruput utf-8 data. Valid sequences aren't likely by chance.
confidence = 25;
}

View File

@ -62,27 +62,24 @@ module.exports.detect = function(buffer) {
fInputLen: buffer.length
};
var matches = recognisers.map(function(rec) {
var match = recognisers.map(function(rec) {
return rec.match(context);
}).filter(function(match) {
return !!match;
});
matches.sort(function(a, b) {
}).sort(function(a, b) {
return a.confidence - b.confidence;
});
}).pop();
return matches.length ? matches.pop().name : null;
return match ? match.name : null;
};
module.exports.detectFile = function(filepath, fn) {
fs.readFile(filepath, function(err, res) {
if (err)
return fn(err, null);
if (err) return fn(err, null);
fn(null, self.detect(res));
});
};
module.exports.detectFileSync = function(filepath) {
return self.detect(fs.readFileSync(filepath));
};
};

View File

@ -1,6 +1,6 @@
{
"name": "chardet",
"version": "0.0.6",
"version": "0.0.7",
"homepage": "https://github.com/runk/node-chardet",
"description": "Character detector",
"keywords": [
@ -34,5 +34,6 @@
"readmeFilename": "README.md",
"directories": {
"test": "test"
}
},
"license" : "MIT"
}