Merge pull request #16 from zevanty/use-wrapper-functions
Use wrapper functions for returning all matches
This commit is contained in:
commit
27c2359cb2
27
README.md
27
README.md
|
@ -14,15 +14,30 @@ npm i chardet
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
To return the encoding with the highest confidence:
|
||||||
```javascript
|
```javascript
|
||||||
var chardet = require('chardet');
|
var chardet = require('chardet');
|
||||||
chardet.detect(new Buffer('hello there!'));
|
chardet.detect(Buffer.from('hello there!'));
|
||||||
// or
|
// or
|
||||||
chardet.detectFile('/path/to/file', function(err, encoding) {});
|
chardet.detectFile('/path/to/file', function(err, encoding) {});
|
||||||
// or
|
// or
|
||||||
chardet.detectFileSync('/path/to/file');
|
chardet.detectFileSync('/path/to/file');
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
To return the full list of possible encodings:
|
||||||
|
```javascript
|
||||||
|
var chardet = require('chardet');
|
||||||
|
chardet.detectAll(Buffer.from('hello there!'));
|
||||||
|
// or
|
||||||
|
chardet.detectFileAll('/path/to/file', function(err, encoding) {});
|
||||||
|
// or
|
||||||
|
chardet.detectFileAllSync('/path/to/file');
|
||||||
|
|
||||||
|
//Returned value is an array of objects sorted by confidence value in decending order
|
||||||
|
//e.g. [{ confidence: 90, name: 'UTF-8'}, {confidence: 20, name: 'windows-1252', lang: 'fr'}]
|
||||||
|
```
|
||||||
|
|
||||||
## Working with large data sets
|
## Working with large data sets
|
||||||
|
|
||||||
Sometimes, when data set is huge and you want to optimize performace (in tradeoff of less accuracy),
|
Sometimes, when data set is huge and you want to optimize performace (in tradeoff of less accuracy),
|
||||||
|
@ -32,16 +47,6 @@ you can sample only first N bytes of the buffer:
|
||||||
chardet.detectFile('/path/to/file', { sampleSize: 32 }, function(err, encoding) {});
|
chardet.detectFile('/path/to/file', { sampleSize: 32 }, function(err, encoding) {});
|
||||||
```
|
```
|
||||||
|
|
||||||
## Returning more detailed results
|
|
||||||
|
|
||||||
If you wish to see the full list of possible encodings:
|
|
||||||
```javascript
|
|
||||||
chardet.detectFile('/path/to/file', { returnAllMatches: true }, function(err, encodings) {
|
|
||||||
//encodings is an array of objects sorted by confidence value in decending order
|
|
||||||
//e.g. [{ confidence: 90, name: 'UTF-8'}, {confidence: 20, name: 'windows-1252', lang: 'fr'}]
|
|
||||||
});
|
|
||||||
```
|
|
||||||
|
|
||||||
## Supported Encodings:
|
## Supported Encodings:
|
||||||
|
|
||||||
* UTF-8
|
* UTF-8
|
||||||
|
|
29
index.js
29
index.js
|
@ -120,3 +120,32 @@ module.exports.detectFileSync = function(filepath, opts) {
|
||||||
|
|
||||||
return self.detect(fs.readFileSync(filepath), opts);
|
return self.detect(fs.readFileSync(filepath), opts);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Wrappers for the previous functions to return all encodings
|
||||||
|
module.exports.detectAll = function(buffer, opts) {
|
||||||
|
if (typeof opts !== 'object') {
|
||||||
|
opts = {};
|
||||||
|
}
|
||||||
|
opts.returnAllMatches = true;
|
||||||
|
return self.detect(buffer, opts);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports.detectFileAll = function(filepath, opts, cb) {
|
||||||
|
if (typeof opts === 'function') {
|
||||||
|
cb = opts;
|
||||||
|
opts = undefined;
|
||||||
|
}
|
||||||
|
if (typeof opts !== 'object') {
|
||||||
|
opts = {};
|
||||||
|
}
|
||||||
|
opts.returnAllMatches = true;
|
||||||
|
self.detectFile(filepath, opts, cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports.detectFileAllSync = function(filepath, opts) {
|
||||||
|
if (typeof opts !== 'object') {
|
||||||
|
opts = {};
|
||||||
|
}
|
||||||
|
opts.returnAllMatches = true;
|
||||||
|
return self.detectFileSync(filepath, opts);
|
||||||
|
}
|
||||||
|
|
|
@ -21,11 +21,6 @@ describe('chardet', function() {
|
||||||
it('should detect encoding', function() {
|
it('should detect encoding', function() {
|
||||||
assert.equal(chardet.detect(fs.readFileSync(path)), 'UTF-8');
|
assert.equal(chardet.detect(fs.readFileSync(path)), 'UTF-8');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return a list of encodings, sorted by confidence level in decending order', function() {
|
|
||||||
var matches = chardet.detect(fs.readFileSync(path), { returnAllMatches: true });
|
|
||||||
assert.deepEqual(matches, expectedEncodingsFromPath);
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('#detectFile', function() {
|
describe('#detectFile', function() {
|
||||||
|
@ -44,9 +39,28 @@ describe('chardet', function() {
|
||||||
done();
|
done();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('#detectFileSync', function() {
|
||||||
|
it('should detect encoding', function() {
|
||||||
|
assert.equal(chardet.detectFileSync(path), 'UTF-8');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should detect encoding with smaller sample size', function() {
|
||||||
|
assert.equal(chardet.detectFileSync(path, { sampleSize: 32 }), 'UTF-8');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('#detectAll', function() {
|
||||||
|
it('should return a list of encodings, sorted by confidence level in decending order', function() {
|
||||||
|
var matches = chardet.detectAll(fs.readFileSync(path));
|
||||||
|
assert.deepEqual(matches, expectedEncodingsFromPath);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('#detectFileAll', function() {
|
||||||
it('should return a list of encodings, sorted by confidence level in decending order', function(done) {
|
it('should return a list of encodings, sorted by confidence level in decending order', function(done) {
|
||||||
chardet.detectFile(path, { returnAllMatches: true }, function(err, res) {
|
chardet.detectFileAll(path, function(err, res) {
|
||||||
assert.equal(err, null);
|
assert.equal(err, null);
|
||||||
assert.deepEqual(res, expectedEncodingsFromPath);
|
assert.deepEqual(res, expectedEncodingsFromPath);
|
||||||
done();
|
done();
|
||||||
|
@ -54,7 +68,7 @@ describe('chardet', function() {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function(done) {
|
it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function(done) {
|
||||||
chardet.detectFile(path, { sampleSize: 32, returnAllMatches: true }, function(err, res) {
|
chardet.detectFileAll(path, { sampleSize: 32 }, function(err, res) {
|
||||||
assert.equal(err, null);
|
assert.equal(err, null);
|
||||||
assert.deepEqual(res, [
|
assert.deepEqual(res, [
|
||||||
{ confidence: 100, name: 'UTF-8', lang: undefined },
|
{ confidence: 100, name: 'UTF-8', lang: undefined },
|
||||||
|
@ -69,22 +83,14 @@ describe('chardet', function() {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('#detectFileSync', function() {
|
describe('#detectFileAllSync', function() {
|
||||||
it('should detect encoding', function() {
|
|
||||||
assert.equal(chardet.detectFileSync(path), 'UTF-8');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should detect encoding with smaller sample size', function() {
|
|
||||||
assert.equal(chardet.detectFileSync(path, { sampleSize: 32 }), 'UTF-8');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should return a list of encodings, sorted by confidence level in decending order', function() {
|
it('should return a list of encodings, sorted by confidence level in decending order', function() {
|
||||||
var matches = chardet.detectFileSync(path, { returnAllMatches: true });
|
var matches = chardet.detectFileAllSync(path);
|
||||||
assert.deepEqual(matches, expectedEncodingsFromPath);
|
assert.deepEqual(matches, expectedEncodingsFromPath);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function() {
|
it('should return a list of encodings even with smaller sample size, sorted by confidence level in decending order', function() {
|
||||||
var matches = chardet.detectFileSync(path, { sampleSize: 32, returnAllMatches: true });
|
var matches = chardet.detectFileAllSync(path, { sampleSize: 32 });
|
||||||
assert.deepEqual(matches, [
|
assert.deepEqual(matches, [
|
||||||
{'confidence': 100, 'name': 'UTF-8', 'lang': undefined},
|
{'confidence': 100, 'name': 'UTF-8', 'lang': undefined},
|
||||||
{'confidence': 10, 'name': 'Shift-JIS', 'lang': undefined},
|
{'confidence': 10, 'name': 'Shift-JIS', 'lang': undefined},
|
||||||
|
|
Loading…
Reference in New Issue