minor changes
This commit is contained in:
parent
17efdf03be
commit
b60a93a538
|
@ -8,12 +8,12 @@ var util = require('util'),
|
||||||
|
|
||||||
module.exports.UTF_16BE = function() {
|
module.exports.UTF_16BE = function() {
|
||||||
this.name = function() {
|
this.name = function() {
|
||||||
return "UTF-16BE";
|
return 'UTF-16BE';
|
||||||
};
|
};
|
||||||
this.match = function(det) {
|
this.match = function(det) {
|
||||||
var input = det.fRawInput;
|
var input = det.fRawInput;
|
||||||
|
|
||||||
if (input.length>=2 && ((input[0] & 0xFF) == 0xFE && (input[1] & 0xFF) == 0xFF)) {
|
if (input.length >= 2 && ((input[0] & 0xFF) == 0xFE && (input[1] & 0xFF) == 0xFF)) {
|
||||||
var confidence = 100;
|
var confidence = 100;
|
||||||
return new Match(det, this, confidence);
|
return new Match(det, this, confidence);
|
||||||
}
|
}
|
||||||
|
@ -25,14 +25,14 @@ module.exports.UTF_16BE = function() {
|
||||||
|
|
||||||
module.exports.UTF_16LE = function() {
|
module.exports.UTF_16LE = function() {
|
||||||
this.name = function() {
|
this.name = function() {
|
||||||
return "UTF-16LE";
|
return 'UTF-16LE';
|
||||||
};
|
};
|
||||||
this.match = function(det) {
|
this.match = function(det) {
|
||||||
var input = det.fRawInput;
|
var input = det.fRawInput;
|
||||||
|
|
||||||
if (input.length >= 2 && ((input[0] & 0xFF) == 0xFF && (input[1] & 0xFF) == 0xFE)) {
|
if (input.length >= 2 && ((input[0] & 0xFF) == 0xFF && (input[1] & 0xFF) == 0xFE)) {
|
||||||
// An LE BOM is present.
|
// An LE BOM is present.
|
||||||
if (input.length>=4 && input[2] == 0x00 && input[3] == 0x00) {
|
if (input.length >= 4 && input[2] == 0x00 && input[3] == 0x00) {
|
||||||
// It is probably UTF-32 LE, not UTF-16
|
// It is probably UTF-32 LE, not UTF-16
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -74,9 +74,9 @@ UTF_32.prototype.match = function(det) {
|
||||||
|
|
||||||
// Cook up some sort of confidence score, based on presence of a BOM
|
// Cook up some sort of confidence score, based on presence of a BOM
|
||||||
// and the existence of valid and/or invalid multi-byte sequences.
|
// and the existence of valid and/or invalid multi-byte sequences.
|
||||||
if (hasBOM && numInvalid==0) {
|
if (hasBOM && numInvalid == 0) {
|
||||||
confidence = 100;
|
confidence = 100;
|
||||||
} else if (hasBOM && numValid > numInvalid*10) {
|
} else if (hasBOM && numValid > numInvalid * 10) {
|
||||||
confidence = 80;
|
confidence = 80;
|
||||||
} else if (numValid > 3 && numInvalid == 0) {
|
} else if (numValid > 3 && numInvalid == 0) {
|
||||||
confidence = 100;
|
confidence = 100;
|
||||||
|
@ -93,7 +93,7 @@ UTF_32.prototype.match = function(det) {
|
||||||
|
|
||||||
module.exports.UTF_32BE = function() {
|
module.exports.UTF_32BE = function() {
|
||||||
this.name = function() {
|
this.name = function() {
|
||||||
return "UTF-32BE";
|
return 'UTF-32BE';
|
||||||
};
|
};
|
||||||
this.getChar = function(input, index) {
|
this.getChar = function(input, index) {
|
||||||
return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 |
|
return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 |
|
||||||
|
@ -104,7 +104,7 @@ util.inherits(module.exports.UTF_32BE, UTF_32);
|
||||||
|
|
||||||
module.exports.UTF_32LE = function() {
|
module.exports.UTF_32LE = function() {
|
||||||
this.name = function() {
|
this.name = function() {
|
||||||
return "UTF-32LE";
|
return 'UTF-32LE';
|
||||||
};
|
};
|
||||||
this.getChar = function(input, index) {
|
this.getChar = function(input, index) {
|
||||||
return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 |
|
return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 |
|
||||||
|
|
|
@ -6,7 +6,7 @@ var Match = require ('../match');
|
||||||
*/
|
*/
|
||||||
module.exports = function() {
|
module.exports = function() {
|
||||||
this.name = function() {
|
this.name = function() {
|
||||||
return "UTF-8";
|
return 'UTF-8';
|
||||||
};
|
};
|
||||||
this.match = function(det) {
|
this.match = function(det) {
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ module.exports = function() {
|
||||||
// Verify that we've got the right number of trail bytes in the sequence
|
// Verify that we've got the right number of trail bytes in the sequence
|
||||||
for (;;) {
|
for (;;) {
|
||||||
i++;
|
i++;
|
||||||
if (i>=det.fRawLength) {
|
if (i >= det.fRawLength) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
b = input[i];
|
b = input[i];
|
||||||
|
@ -66,9 +66,9 @@ module.exports = function() {
|
||||||
// Cook up some sort of confidence score, based on presense of a BOM
|
// Cook up some sort of confidence score, based on presense of a BOM
|
||||||
// and the existence of valid and/or invalid multi-byte sequences.
|
// and the existence of valid and/or invalid multi-byte sequences.
|
||||||
confidence = 0;
|
confidence = 0;
|
||||||
if (hasBOM && numInvalid==0) {
|
if (hasBOM && numInvalid == 0) {
|
||||||
confidence = 100;
|
confidence = 100;
|
||||||
} else if (hasBOM && numValid > numInvalid*10) {
|
} else if (hasBOM && numValid > numInvalid * 10) {
|
||||||
confidence = 80;
|
confidence = 80;
|
||||||
} else if (numValid > 3 && numInvalid == 0) {
|
} else if (numValid > 3 && numInvalid == 0) {
|
||||||
confidence = 100;
|
confidence = 100;
|
||||||
|
@ -77,7 +77,7 @@ module.exports = function() {
|
||||||
} else if (numValid == 0 && numInvalid == 0) {
|
} else if (numValid == 0 && numInvalid == 0) {
|
||||||
// Plain ASCII.
|
// Plain ASCII.
|
||||||
confidence = 10;
|
confidence = 10;
|
||||||
} else if (numValid > numInvalid*10) {
|
} else if (numValid > numInvalid * 10) {
|
||||||
// Probably corruput utf-8 data. Valid sequences aren't likely by chance.
|
// Probably corruput utf-8 data. Valid sequences aren't likely by chance.
|
||||||
confidence = 25;
|
confidence = 25;
|
||||||
}
|
}
|
||||||
|
|
15
index.js
15
index.js
|
@ -62,27 +62,24 @@ module.exports.detect = function(buffer) {
|
||||||
fInputLen: buffer.length
|
fInputLen: buffer.length
|
||||||
};
|
};
|
||||||
|
|
||||||
var matches = recognisers.map(function(rec) {
|
var match = recognisers.map(function(rec) {
|
||||||
return rec.match(context);
|
return rec.match(context);
|
||||||
}).filter(function(match) {
|
}).filter(function(match) {
|
||||||
return !!match;
|
return !!match;
|
||||||
});
|
}).sort(function(a, b) {
|
||||||
|
|
||||||
matches.sort(function(a, b) {
|
|
||||||
return a.confidence - b.confidence;
|
return a.confidence - b.confidence;
|
||||||
});
|
}).pop();
|
||||||
|
|
||||||
return matches.length ? matches.pop().name : null;
|
return match ? match.name : null;
|
||||||
};
|
};
|
||||||
|
|
||||||
module.exports.detectFile = function(filepath, fn) {
|
module.exports.detectFile = function(filepath, fn) {
|
||||||
fs.readFile(filepath, function(err, res) {
|
fs.readFile(filepath, function(err, res) {
|
||||||
if (err)
|
if (err) return fn(err, null);
|
||||||
return fn(err, null);
|
|
||||||
fn(null, self.detect(res));
|
fn(null, self.detect(res));
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
module.exports.detectFileSync = function(filepath) {
|
module.exports.detectFileSync = function(filepath) {
|
||||||
return self.detect(fs.readFileSync(filepath));
|
return self.detect(fs.readFileSync(filepath));
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "chardet",
|
"name": "chardet",
|
||||||
"version": "0.0.6",
|
"version": "0.0.7",
|
||||||
"homepage": "https://github.com/runk/node-chardet",
|
"homepage": "https://github.com/runk/node-chardet",
|
||||||
"description": "Character detector",
|
"description": "Character detector",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
|
@ -34,5 +34,6 @@
|
||||||
"readmeFilename": "README.md",
|
"readmeFilename": "README.md",
|
||||||
"directories": {
|
"directories": {
|
||||||
"test": "test"
|
"test": "test"
|
||||||
}
|
},
|
||||||
|
"license" : "MIT"
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue