2013-04-30 13:49:02 +00:00
|
|
|
var assert = require('assert'),
|
2013-11-22 04:40:19 +00:00
|
|
|
chardet = require('../'),
|
|
|
|
fs = require('fs');
|
2013-04-30 13:49:02 +00:00
|
|
|
|
|
|
|
describe('chardet', function() {
|
|
|
|
|
2013-11-22 04:40:19 +00:00
|
|
|
var path = __dirname + '/data/encodings/utf8';
|
2018-04-22 23:22:32 +00:00
|
|
|
var expectedEncodingsFromPath = [
|
2018-04-22 07:57:54 +00:00
|
|
|
{ 'confidence': 100, 'name': 'UTF-8', 'lang': undefined },
|
|
|
|
{ 'confidence': 32, 'name': 'windows-1252', 'lang': 'fr' },
|
|
|
|
{ 'confidence': 19, 'name': 'KOI8-R', 'lang': undefined },
|
|
|
|
{ 'confidence': 10, 'name': 'Big5', 'lang': undefined },
|
|
|
|
{ 'confidence': 10, 'name': 'GB18030', 'lang': undefined },
|
|
|
|
{ 'confidence': 10, 'name': 'windows-1253', 'lang': undefined },
|
|
|
|
{ 'confidence': 6, 'name': 'windows-1250', 'lang': 'pl' },
|
|
|
|
{ 'confidence': 4, 'name': 'windows-1254', 'lang': undefined },
|
|
|
|
{ 'confidence': 2, 'name': 'windows-1251', 'lang': undefined }
|
2018-04-22 23:22:32 +00:00
|
|
|
];
|
2013-04-30 13:49:02 +00:00
|
|
|
|
2013-11-22 04:40:19 +00:00
|
|
|
describe('#detect', function() {
|
|
|
|
it('should detect encoding', function() {
|
|
|
|
assert.equal(chardet.detect(fs.readFileSync(path)), 'UTF-8');
|
2013-04-30 13:49:02 +00:00
|
|
|
});
|
2018-04-22 07:57:54 +00:00
|
|
|
|
|
|
|
it('should return a list of encodings, sorted by confidence level in decending order', function() {
|
|
|
|
var matches = chardet.detect(fs.readFileSync(path), { returnAllMatches: true });
|
2018-04-22 23:22:32 +00:00
|
|
|
assert.deepEqual(matches, expectedEncodingsFromPath);
|
2018-04-22 07:57:54 +00:00
|
|
|
});
|
2013-11-22 04:40:19 +00:00
|
|
|
});
|
2013-04-30 13:49:02 +00:00
|
|
|
|
2013-11-22 04:40:19 +00:00
|
|
|
describe('#detectFile', function() {
|
|
|
|
it('should detect encoding', function(done) {
|
|
|
|
chardet.detectFile(path, function(err, res) {
|
|
|
|
assert.equal(err, null);
|
|
|
|
assert.equal(res, 'UTF-8');
|
|
|
|
done();
|
|
|
|
});
|
2013-04-30 13:49:02 +00:00
|
|
|
});
|
2017-10-16 00:42:49 +00:00
|
|
|
|
|
|
|
it('should detect encoding with smaller sample size', function(done) {
|
|
|
|
chardet.detectFile(path, { sampleSize: 32 }, function(err, res) {
|
|
|
|
assert.equal(err, null);
|
|
|
|
assert.equal(res, 'UTF-8');
|
|
|
|
done();
|
|
|
|
});
|
|
|
|
});
|
2018-04-22 07:57:54 +00:00
|
|
|
|
2018-04-22 23:22:32 +00:00
|
|
|
it('should return a list of encodings, sorted by confidence level in decending order', function(done) {
|
2018-04-22 07:57:54 +00:00
|
|
|
chardet.detectFile(path, { returnAllMatches: true }, function(err, res) {
|
|
|
|
assert.equal(err, null);
|
2018-04-22 23:22:32 +00:00
|
|
|
assert.deepEqual(res, expectedEncodingsFromPath);
|
2018-04-22 07:57:54 +00:00
|
|
|
done();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
2018-04-22 23:22:32 +00:00
|
|
|
it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function(done) {
|
2018-04-22 07:57:54 +00:00
|
|
|
chardet.detectFile(path, { sampleSize: 32, returnAllMatches: true }, function(err, res) {
|
|
|
|
assert.equal(err, null);
|
2018-04-22 23:22:32 +00:00
|
|
|
assert.deepEqual(res, [
|
|
|
|
{ confidence: 100, name: 'UTF-8', lang: undefined },
|
|
|
|
{ confidence: 10, name: 'Shift-JIS', lang: undefined },
|
|
|
|
{ confidence: 10, name: 'windows-1252', lang: 'it' },
|
|
|
|
{ confidence: 10, name: 'windows-1250', lang: 'hu' },
|
|
|
|
{ confidence: 10, name: 'windows-1253', lang: undefined },
|
|
|
|
{ confidence: 10, name: 'windows-1251', lang: undefined }
|
|
|
|
]);
|
2018-04-22 07:57:54 +00:00
|
|
|
done();
|
|
|
|
});
|
|
|
|
});
|
2013-11-22 04:40:19 +00:00
|
|
|
});
|
2013-04-30 13:49:02 +00:00
|
|
|
|
2013-11-22 04:40:19 +00:00
|
|
|
describe('#detectFileSync', function() {
|
|
|
|
it('should detect encoding', function() {
|
|
|
|
assert.equal(chardet.detectFileSync(path), 'UTF-8');
|
2013-04-30 13:49:02 +00:00
|
|
|
});
|
2017-10-16 00:42:49 +00:00
|
|
|
|
|
|
|
it('should detect encoding with smaller sample size', function() {
|
|
|
|
assert.equal(chardet.detectFileSync(path, { sampleSize: 32 }), 'UTF-8');
|
|
|
|
});
|
2018-04-22 07:57:54 +00:00
|
|
|
|
|
|
|
it('should return a list of encodings, sorted by confidence level in decending order', function() {
|
|
|
|
var matches = chardet.detectFileSync(path, { returnAllMatches: true });
|
2018-04-22 23:22:32 +00:00
|
|
|
assert.deepEqual(matches, expectedEncodingsFromPath);
|
2018-04-22 07:57:54 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function() {
|
|
|
|
var matches = chardet.detectFileSync(path, { sampleSize: 32, returnAllMatches: true });
|
2018-04-22 23:22:32 +00:00
|
|
|
assert.deepEqual(matches, [
|
2018-04-22 07:57:54 +00:00
|
|
|
{'confidence': 100, 'name': 'UTF-8', 'lang': undefined},
|
|
|
|
{'confidence': 10, 'name': 'Shift-JIS', 'lang': undefined},
|
|
|
|
{'confidence': 10, 'name': 'windows-1252', 'lang': 'it'},
|
|
|
|
{'confidence': 10, 'name': 'windows-1250', 'lang': 'hu'},
|
|
|
|
{'confidence': 10, 'name': 'windows-1253', 'lang': undefined},
|
|
|
|
{'confidence': 10, 'name': 'windows-1251', 'lang': undefined}
|
2018-04-22 23:22:32 +00:00
|
|
|
]);
|
2018-04-22 07:57:54 +00:00
|
|
|
});
|
2013-11-22 04:40:19 +00:00
|
|
|
});
|
|
|
|
});
|