commit
20f996734e
|
@ -23,3 +23,4 @@ jobs:
|
||||||
- run: npm i
|
- run: npm i
|
||||||
- run: npm test
|
- run: npm test
|
||||||
- run: npm run build
|
- run: npm run build
|
||||||
|
- run: .github/workflows/test-build.sh
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
const assert = require('assert');
|
||||||
|
|
||||||
|
const chardet = require(process.cwd());
|
||||||
|
|
||||||
|
assert(typeof chardet.analyse, 'function');
|
||||||
|
assert(typeof chardet.detect, 'function');
|
||||||
|
assert(typeof chardet.detectFile, 'function');
|
||||||
|
assert(typeof chardet.detectFileSync, 'function');
|
||||||
|
|
||||||
|
assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
|
||||||
|
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
|
||||||
|
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
|
||||||
|
{ confidence: 10, name: 'UTF-8', lang: undefined },
|
||||||
|
{ confidence: 10, name: 'Shift_JIS', lang: 'ja' },
|
||||||
|
{ confidence: 10, name: 'Big5', lang: 'zh' },
|
||||||
|
{ confidence: 10, name: 'EUC-JP', lang: 'ja' },
|
||||||
|
{ confidence: 10, name: 'EUC-KR', lang: 'ko' },
|
||||||
|
{ confidence: 10, name: 'GB18030', lang: 'zh' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
console.log(' > test-build.js OK');
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/sh -ex
|
||||||
|
|
||||||
|
export PATH=$PATH:$(npm bin)
|
||||||
|
|
||||||
|
node ./.github/workflows/test-build.js
|
||||||
|
ts-node ./.github/workflows/test-build.ts
|
|
@ -0,0 +1,28 @@
|
||||||
|
import assert from 'assert';
|
||||||
|
|
||||||
|
const main = async () => {
|
||||||
|
const chardet = await import(process.cwd());
|
||||||
|
|
||||||
|
assert(typeof chardet.analyse, 'function');
|
||||||
|
assert(typeof chardet.detect, 'function');
|
||||||
|
assert(typeof chardet.detectFile, 'function');
|
||||||
|
assert(typeof chardet.detectFileSync, 'function');
|
||||||
|
|
||||||
|
assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
|
||||||
|
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
|
||||||
|
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
|
||||||
|
{ confidence: 10, name: 'UTF-8', lang: undefined },
|
||||||
|
{ confidence: 10, name: 'Shift_JIS', lang: 'ja' },
|
||||||
|
{ confidence: 10, name: 'Big5', lang: 'zh' },
|
||||||
|
{ confidence: 10, name: 'EUC-JP', lang: 'ja' },
|
||||||
|
{ confidence: 10, name: 'EUC-KR', lang: 'ko' },
|
||||||
|
{ confidence: 10, name: 'GB18030', lang: 'zh' },
|
||||||
|
]);
|
||||||
|
};
|
||||||
|
|
||||||
|
main()
|
||||||
|
.then(() => console.log(' > test-build.ts OK'))
|
||||||
|
.catch((err) => {
|
||||||
|
console.error(err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
12
README.md
12
README.md
|
@ -21,19 +21,19 @@ npm i chardet
|
||||||
To return the encoding with the highest confidence:
|
To return the encoding with the highest confidence:
|
||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
const chardet = require('chardet');
|
import chardet from 'chardet';
|
||||||
|
|
||||||
chardet.detect(Buffer.from('hello there!'));
|
const encoding = chardet.detect(Buffer.from('hello there!'));
|
||||||
// or
|
// or
|
||||||
chardet.detectFile('/path/to/file').then(encoding => console.log(encoding));
|
const encoding = await chardet.detectFile('/path/to/file');
|
||||||
// or
|
// or
|
||||||
chardet.detectFileSync('/path/to/file');
|
const encoding = chardet.detectFileSync('/path/to/file');
|
||||||
```
|
```
|
||||||
|
|
||||||
To return the full list of possible encodings use `analyse` method.
|
To return the full list of possible encodings use `analyse` method.
|
||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
const chardet = require('chardet');
|
import chardet from 'chardet';
|
||||||
chardet.analyse(Buffer.from('hello there!'));
|
chardet.analyse(Buffer.from('hello there!'));
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ Returned value is an array of objects sorted by confidence value in decending or
|
||||||
|
|
||||||
## Working with large data sets
|
## Working with large data sets
|
||||||
|
|
||||||
Sometimes, when data set is huge and you want to optimize performace (in tradeoff of less accuracy),
|
Sometimes, when data set is huge and you want to optimize performace (with a tradeoff of less accuracy),
|
||||||
you can sample only first N bytes of the buffer:
|
you can sample only first N bytes of the buffer:
|
||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
|
|
|
@ -39,6 +39,7 @@
|
||||||
"prettier": "^2.1.2",
|
"prettier": "^2.1.2",
|
||||||
"semantic-release": "^17.1.2",
|
"semantic-release": "^17.1.2",
|
||||||
"ts-jest": "^26.4.0",
|
"ts-jest": "^26.4.0",
|
||||||
|
"ts-node": "^10.9.1",
|
||||||
"typescript": "^4.8.4"
|
"typescript": "^4.8.4"
|
||||||
},
|
},
|
||||||
"keywords": [
|
"keywords": [
|
||||||
|
|
Loading…
Reference in New Issue