commit
20f996734e
|
@ -23,3 +23,4 @@ jobs:
|
|||
- run: npm i
|
||||
- run: npm test
|
||||
- run: npm run build
|
||||
- run: .github/workflows/test-build.sh
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
const assert = require('assert');
|
||||
|
||||
const chardet = require(process.cwd());
|
||||
|
||||
assert(typeof chardet.analyse, 'function');
|
||||
assert(typeof chardet.detect, 'function');
|
||||
assert(typeof chardet.detectFile, 'function');
|
||||
assert(typeof chardet.detectFileSync, 'function');
|
||||
|
||||
assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
|
||||
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
|
||||
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
|
||||
{ confidence: 10, name: 'UTF-8', lang: undefined },
|
||||
{ confidence: 10, name: 'Shift_JIS', lang: 'ja' },
|
||||
{ confidence: 10, name: 'Big5', lang: 'zh' },
|
||||
{ confidence: 10, name: 'EUC-JP', lang: 'ja' },
|
||||
{ confidence: 10, name: 'EUC-KR', lang: 'ko' },
|
||||
{ confidence: 10, name: 'GB18030', lang: 'zh' },
|
||||
]);
|
||||
|
||||
console.log(' > test-build.js OK');
|
|
@ -0,0 +1,6 @@
|
|||
#!/bin/sh -ex
|
||||
|
||||
export PATH=$PATH:$(npm bin)
|
||||
|
||||
node ./.github/workflows/test-build.js
|
||||
ts-node ./.github/workflows/test-build.ts
|
|
@ -0,0 +1,28 @@
|
|||
import assert from 'assert';
|
||||
|
||||
const main = async () => {
|
||||
const chardet = await import(process.cwd());
|
||||
|
||||
assert(typeof chardet.analyse, 'function');
|
||||
assert(typeof chardet.detect, 'function');
|
||||
assert(typeof chardet.detectFile, 'function');
|
||||
assert(typeof chardet.detectFileSync, 'function');
|
||||
|
||||
assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
|
||||
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
|
||||
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
|
||||
{ confidence: 10, name: 'UTF-8', lang: undefined },
|
||||
{ confidence: 10, name: 'Shift_JIS', lang: 'ja' },
|
||||
{ confidence: 10, name: 'Big5', lang: 'zh' },
|
||||
{ confidence: 10, name: 'EUC-JP', lang: 'ja' },
|
||||
{ confidence: 10, name: 'EUC-KR', lang: 'ko' },
|
||||
{ confidence: 10, name: 'GB18030', lang: 'zh' },
|
||||
]);
|
||||
};
|
||||
|
||||
main()
|
||||
.then(() => console.log(' > test-build.ts OK'))
|
||||
.catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
12
README.md
12
README.md
|
@ -21,19 +21,19 @@ npm i chardet
|
|||
To return the encoding with the highest confidence:
|
||||
|
||||
```javascript
|
||||
const chardet = require('chardet');
|
||||
import chardet from 'chardet';
|
||||
|
||||
chardet.detect(Buffer.from('hello there!'));
|
||||
const encoding = chardet.detect(Buffer.from('hello there!'));
|
||||
// or
|
||||
chardet.detectFile('/path/to/file').then(encoding => console.log(encoding));
|
||||
const encoding = await chardet.detectFile('/path/to/file');
|
||||
// or
|
||||
chardet.detectFileSync('/path/to/file');
|
||||
const encoding = chardet.detectFileSync('/path/to/file');
|
||||
```
|
||||
|
||||
To return the full list of possible encodings use `analyse` method.
|
||||
|
||||
```javascript
|
||||
const chardet = require('chardet');
|
||||
import chardet from 'chardet';
|
||||
chardet.analyse(Buffer.from('hello there!'));
|
||||
```
|
||||
|
||||
|
@ -48,7 +48,7 @@ Returned value is an array of objects sorted by confidence value in decending or
|
|||
|
||||
## Working with large data sets
|
||||
|
||||
Sometimes, when data set is huge and you want to optimize performace (in tradeoff of less accuracy),
|
||||
Sometimes, when data set is huge and you want to optimize performace (with a tradeoff of less accuracy),
|
||||
you can sample only first N bytes of the buffer:
|
||||
|
||||
```javascript
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
"prettier": "^2.1.2",
|
||||
"semantic-release": "^17.1.2",
|
||||
"ts-jest": "^26.4.0",
|
||||
"ts-node": "^10.9.1",
|
||||
"typescript": "^4.8.4"
|
||||
},
|
||||
"keywords": [
|
||||
|
|
Loading…
Reference in New Issue