diff --git a/README.md b/README.md index a975c84..5c35cb5 100644 --- a/README.md +++ b/README.md @@ -14,15 +14,30 @@ npm i chardet ## Usage +To return the encoding with the highest confidence: ```javascript var chardet = require('chardet'); -chardet.detect(new Buffer('hello there!')); +chardet.detect(Buffer.from('hello there!')); // or chardet.detectFile('/path/to/file', function(err, encoding) {}); // or chardet.detectFileSync('/path/to/file'); ``` + +To return the full list of possible encodings: +```javascript +var chardet = require('chardet'); +chardet.detectAll(Buffer.from('hello there!')); +// or +chardet.detectFileAll('/path/to/file', function(err, encoding) {}); +// or +chardet.detectFileAllSync('/path/to/file'); + +//Returned value is an array of objects sorted by confidence value in decending order +//e.g. [{ confidence: 90, name: 'UTF-8'}, {confidence: 20, name: 'windows-1252', lang: 'fr'}] +``` + ## Working with large data sets Sometimes, when data set is huge and you want to optimize performace (in tradeoff of less accuracy), @@ -32,16 +47,6 @@ you can sample only first N bytes of the buffer: chardet.detectFile('/path/to/file', { sampleSize: 32 }, function(err, encoding) {}); ``` -## Returning more detailed results - -If you wish to see the full list of possible encodings: -```javascript -chardet.detectFile('/path/to/file', { returnAllMatches: true }, function(err, encodings) { - //encodings is an array of objects sorted by confidence value in decending order - //e.g. [{ confidence: 90, name: 'UTF-8'}, {confidence: 20, name: 'windows-1252', lang: 'fr'}] -}); -``` - ## Supported Encodings: * UTF-8 diff --git a/index.js b/index.js index f75b4ba..7f4e6ce 100644 --- a/index.js +++ b/index.js @@ -120,3 +120,32 @@ module.exports.detectFileSync = function(filepath, opts) { return self.detect(fs.readFileSync(filepath), opts); }; + +// Wrappers for the previous functions to return all encodings +module.exports.detectAll = function(buffer, opts) { + if (typeof opts !== 'object') { + opts = {}; + } + opts.returnAllMatches = true; + return self.detect(buffer, opts); +} + +module.exports.detectFileAll = function(filepath, opts, cb) { + if (typeof opts === 'function') { + cb = opts; + opts = undefined; + } + if (typeof opts !== 'object') { + opts = {}; + } + opts.returnAllMatches = true; + self.detectFile(filepath, opts, cb); +} + +module.exports.detectFileAllSync = function(filepath, opts) { + if (typeof opts !== 'object') { + opts = {}; + } + opts.returnAllMatches = true; + return self.detectFileSync(filepath, opts); +} diff --git a/test/chardet.js b/test/chardet.js index d089c19..ac6575f 100644 --- a/test/chardet.js +++ b/test/chardet.js @@ -21,11 +21,6 @@ describe('chardet', function() { it('should detect encoding', function() { assert.equal(chardet.detect(fs.readFileSync(path)), 'UTF-8'); }); - - it('should return a list of encodings, sorted by confidence level in decending order', function() { - var matches = chardet.detect(fs.readFileSync(path), { returnAllMatches: true }); - assert.deepEqual(matches, expectedEncodingsFromPath); - }); }); describe('#detectFile', function() { @@ -44,9 +39,28 @@ describe('chardet', function() { done(); }); }); + }); + describe('#detectFileSync', function() { + it('should detect encoding', function() { + assert.equal(chardet.detectFileSync(path), 'UTF-8'); + }); + + it('should detect encoding with smaller sample size', function() { + assert.equal(chardet.detectFileSync(path, { sampleSize: 32 }), 'UTF-8'); + }); + }); + + describe('#detectAll', function() { + it('should return a list of encodings, sorted by confidence level in decending order', function() { + var matches = chardet.detectAll(fs.readFileSync(path)); + assert.deepEqual(matches, expectedEncodingsFromPath); + }); + }); + + describe('#detectFileAll', function() { it('should return a list of encodings, sorted by confidence level in decending order', function(done) { - chardet.detectFile(path, { returnAllMatches: true }, function(err, res) { + chardet.detectFileAll(path, function(err, res) { assert.equal(err, null); assert.deepEqual(res, expectedEncodingsFromPath); done(); @@ -54,7 +68,7 @@ describe('chardet', function() { }); it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function(done) { - chardet.detectFile(path, { sampleSize: 32, returnAllMatches: true }, function(err, res) { + chardet.detectFileAll(path, { sampleSize: 32 }, function(err, res) { assert.equal(err, null); assert.deepEqual(res, [ { confidence: 100, name: 'UTF-8', lang: undefined }, @@ -69,22 +83,14 @@ describe('chardet', function() { }); }); - describe('#detectFileSync', function() { - it('should detect encoding', function() { - assert.equal(chardet.detectFileSync(path), 'UTF-8'); - }); - - it('should detect encoding with smaller sample size', function() { - assert.equal(chardet.detectFileSync(path, { sampleSize: 32 }), 'UTF-8'); - }); - + describe('#detectFileAllSync', function() { it('should return a list of encodings, sorted by confidence level in decending order', function() { - var matches = chardet.detectFileSync(path, { returnAllMatches: true }); + var matches = chardet.detectFileAllSync(path); assert.deepEqual(matches, expectedEncodingsFromPath); }); it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function() { - var matches = chardet.detectFileSync(path, { sampleSize: 32, returnAllMatches: true }); + var matches = chardet.detectFileAllSync(path, { sampleSize: 32 }); assert.deepEqual(matches, [ {'confidence': 100, 'name': 'UTF-8', 'lang': undefined}, {'confidence': 10, 'name': 'Shift-JIS', 'lang': undefined},