Merge pull request #16 from zevanty/use-wrapper-functions

Use wrapper functions for returning all matches
This commit is contained in:
Dmitry Shirokov 2018-07-02 16:00:45 +10:00 committed by GitHub
commit 27c2359cb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 29 deletions

View File

@ -14,15 +14,30 @@ npm i chardet
## Usage ## Usage
To return the encoding with the highest confidence:
```javascript ```javascript
var chardet = require('chardet'); var chardet = require('chardet');
chardet.detect(new Buffer('hello there!')); chardet.detect(Buffer.from('hello there!'));
// or // or
chardet.detectFile('/path/to/file', function(err, encoding) {}); chardet.detectFile('/path/to/file', function(err, encoding) {});
// or // or
chardet.detectFileSync('/path/to/file'); chardet.detectFileSync('/path/to/file');
``` ```
To return the full list of possible encodings:
```javascript
var chardet = require('chardet');
chardet.detectAll(Buffer.from('hello there!'));
// or
chardet.detectFileAll('/path/to/file', function(err, encoding) {});
// or
chardet.detectFileAllSync('/path/to/file');
//Returned value is an array of objects sorted by confidence value in decending order
//e.g. [{ confidence: 90, name: 'UTF-8'}, {confidence: 20, name: 'windows-1252', lang: 'fr'}]
```
## Working with large data sets ## Working with large data sets
Sometimes, when data set is huge and you want to optimize performace (in tradeoff of less accuracy), Sometimes, when data set is huge and you want to optimize performace (in tradeoff of less accuracy),
@ -32,16 +47,6 @@ you can sample only first N bytes of the buffer:
chardet.detectFile('/path/to/file', { sampleSize: 32 }, function(err, encoding) {}); chardet.detectFile('/path/to/file', { sampleSize: 32 }, function(err, encoding) {});
``` ```
## Returning more detailed results
If you wish to see the full list of possible encodings:
```javascript
chardet.detectFile('/path/to/file', { returnAllMatches: true }, function(err, encodings) {
//encodings is an array of objects sorted by confidence value in decending order
//e.g. [{ confidence: 90, name: 'UTF-8'}, {confidence: 20, name: 'windows-1252', lang: 'fr'}]
});
```
## Supported Encodings: ## Supported Encodings:
* UTF-8 * UTF-8

View File

@ -120,3 +120,32 @@ module.exports.detectFileSync = function(filepath, opts) {
return self.detect(fs.readFileSync(filepath), opts); return self.detect(fs.readFileSync(filepath), opts);
}; };
// Wrappers for the previous functions to return all encodings
module.exports.detectAll = function(buffer, opts) {
if (typeof opts !== 'object') {
opts = {};
}
opts.returnAllMatches = true;
return self.detect(buffer, opts);
}
module.exports.detectFileAll = function(filepath, opts, cb) {
if (typeof opts === 'function') {
cb = opts;
opts = undefined;
}
if (typeof opts !== 'object') {
opts = {};
}
opts.returnAllMatches = true;
self.detectFile(filepath, opts, cb);
}
module.exports.detectFileAllSync = function(filepath, opts) {
if (typeof opts !== 'object') {
opts = {};
}
opts.returnAllMatches = true;
return self.detectFileSync(filepath, opts);
}

View File

@ -21,11 +21,6 @@ describe('chardet', function() {
it('should detect encoding', function() { it('should detect encoding', function() {
assert.equal(chardet.detect(fs.readFileSync(path)), 'UTF-8'); assert.equal(chardet.detect(fs.readFileSync(path)), 'UTF-8');
}); });
it('should return a list of encodings, sorted by confidence level in decending order', function() {
var matches = chardet.detect(fs.readFileSync(path), { returnAllMatches: true });
assert.deepEqual(matches, expectedEncodingsFromPath);
});
}); });
describe('#detectFile', function() { describe('#detectFile', function() {
@ -44,9 +39,28 @@ describe('chardet', function() {
done(); done();
}); });
}); });
});
describe('#detectFileSync', function() {
it('should detect encoding', function() {
assert.equal(chardet.detectFileSync(path), 'UTF-8');
});
it('should detect encoding with smaller sample size', function() {
assert.equal(chardet.detectFileSync(path, { sampleSize: 32 }), 'UTF-8');
});
});
describe('#detectAll', function() {
it('should return a list of encodings, sorted by confidence level in decending order', function() {
var matches = chardet.detectAll(fs.readFileSync(path));
assert.deepEqual(matches, expectedEncodingsFromPath);
});
});
describe('#detectFileAll', function() {
it('should return a list of encodings, sorted by confidence level in decending order', function(done) { it('should return a list of encodings, sorted by confidence level in decending order', function(done) {
chardet.detectFile(path, { returnAllMatches: true }, function(err, res) { chardet.detectFileAll(path, function(err, res) {
assert.equal(err, null); assert.equal(err, null);
assert.deepEqual(res, expectedEncodingsFromPath); assert.deepEqual(res, expectedEncodingsFromPath);
done(); done();
@ -54,7 +68,7 @@ describe('chardet', function() {
}); });
it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function(done) { it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function(done) {
chardet.detectFile(path, { sampleSize: 32, returnAllMatches: true }, function(err, res) { chardet.detectFileAll(path, { sampleSize: 32 }, function(err, res) {
assert.equal(err, null); assert.equal(err, null);
assert.deepEqual(res, [ assert.deepEqual(res, [
{ confidence: 100, name: 'UTF-8', lang: undefined }, { confidence: 100, name: 'UTF-8', lang: undefined },
@ -69,22 +83,14 @@ describe('chardet', function() {
}); });
}); });
describe('#detectFileSync', function() { describe('#detectFileAllSync', function() {
it('should detect encoding', function() {
assert.equal(chardet.detectFileSync(path), 'UTF-8');
});
it('should detect encoding with smaller sample size', function() {
assert.equal(chardet.detectFileSync(path, { sampleSize: 32 }), 'UTF-8');
});
it('should return a list of encodings, sorted by confidence level in decending order', function() { it('should return a list of encodings, sorted by confidence level in decending order', function() {
var matches = chardet.detectFileSync(path, { returnAllMatches: true }); var matches = chardet.detectFileAllSync(path);
assert.deepEqual(matches, expectedEncodingsFromPath); assert.deepEqual(matches, expectedEncodingsFromPath);
}); });
it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function() { it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function() {
var matches = chardet.detectFileSync(path, { sampleSize: 32, returnAllMatches: true }); var matches = chardet.detectFileAllSync(path, { sampleSize: 32 });
assert.deepEqual(matches, [ assert.deepEqual(matches, [
{'confidence': 100, 'name': 'UTF-8', 'lang': undefined}, {'confidence': 100, 'name': 'UTF-8', 'lang': undefined},
{'confidence': 10, 'name': 'Shift-JIS', 'lang': undefined}, {'confidence': 10, 'name': 'Shift-JIS', 'lang': undefined},