Merge pull request #64 from crisp-dev/master
feat: allow position offset as option
This commit is contained in:
commit
685cba81b3
14
README.md
14
README.md
|
@ -37,7 +37,7 @@ import chardet from 'chardet';
|
||||||
chardet.analyse(Buffer.from('hello there!'));
|
chardet.analyse(Buffer.from('hello there!'));
|
||||||
```
|
```
|
||||||
|
|
||||||
Returned value is an array of objects sorted by confidence value in decending order
|
Returned value is an array of objects sorted by confidence value in descending order
|
||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
[
|
[
|
||||||
|
@ -48,8 +48,8 @@ Returned value is an array of objects sorted by confidence value in decending or
|
||||||
|
|
||||||
## Working with large data sets
|
## Working with large data sets
|
||||||
|
|
||||||
Sometimes, when data set is huge and you want to optimize performace (with a tradeoff of less accuracy),
|
Sometimes, when data set is huge and you want to optimize performance (with a tradeoff of less accuracy),
|
||||||
you can sample only first N bytes of the buffer:
|
you can sample only the first N bytes of the buffer:
|
||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
chardet
|
chardet
|
||||||
|
@ -57,6 +57,14 @@ chardet
|
||||||
.then(encoding => console.log(encoding));
|
.then(encoding => console.log(encoding));
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You can also specify where to begin reading from in the buffer:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
chardet
|
||||||
|
.detectFile('/path/to/file', { sampleSize: 32, offset: 128 })
|
||||||
|
.then(encoding => console.log(encoding));
|
||||||
|
```
|
||||||
|
|
||||||
## Supported Encodings:
|
## Supported Encodings:
|
||||||
|
|
||||||
- UTF-8
|
- UTF-8
|
||||||
|
|
|
@ -40,6 +40,11 @@ describe('chardet', () => {
|
||||||
const res = await chardet.detectFile(path, { sampleSize: 32 });
|
const res = await chardet.detectFile(path, { sampleSize: 32 });
|
||||||
expect(res).toBe('UTF-8');
|
expect(res).toBe('UTF-8');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should detect encoding with smaller sample size and offset', async () => {
|
||||||
|
const res = await chardet.detectFile(path, { sampleSize: 32, offset: 64 });
|
||||||
|
expect(res).toBe('UTF-8');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('#detectFileSync', () => {
|
describe('#detectFileSync', () => {
|
||||||
|
@ -50,6 +55,10 @@ describe('chardet', () => {
|
||||||
it('should detect encoding with smaller sample size', () => {
|
it('should detect encoding with smaller sample size', () => {
|
||||||
expect(chardet.detectFileSync(path, { sampleSize: 32 })).toBe('UTF-8');
|
expect(chardet.detectFileSync(path, { sampleSize: 32 })).toBe('UTF-8');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should detect encoding with smaller sample size and offset', () => {
|
||||||
|
expect(chardet.detectFileSync(path, { sampleSize: 32, offset: 64 })).toBe('UTF-8');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('#analyse', () => {
|
describe('#analyse', () => {
|
||||||
|
|
|
@ -10,7 +10,8 @@ import * as sbcs from './encoding/sbcs';
|
||||||
import * as iso2022 from './encoding/iso2022';
|
import * as iso2022 from './encoding/iso2022';
|
||||||
|
|
||||||
interface FullOptions {
|
interface FullOptions {
|
||||||
sampleSize: number
|
sampleSize: number,
|
||||||
|
offset: number
|
||||||
}
|
}
|
||||||
|
|
||||||
type Options = Partial<FullOptions>
|
type Options = Partial<FullOptions>
|
||||||
|
@ -107,7 +108,7 @@ export const detectFile = (filepath: string, opts: Options = {}): Promise<Detect
|
||||||
fd = fs.openSync(filepath, 'r');
|
fd = fs.openSync(filepath, 'r');
|
||||||
const sample: Buffer = Buffer.allocUnsafe(opts.sampleSize);
|
const sample: Buffer = Buffer.allocUnsafe(opts.sampleSize);
|
||||||
|
|
||||||
fs.read(fd, sample, 0, opts.sampleSize, null, (err?: Error) => {
|
fs.read(fd, sample, 0, opts.sampleSize, opts.offset, (err?: Error) => {
|
||||||
handler(err, sample);
|
handler(err, sample);
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
|
@ -123,7 +124,7 @@ export const detectFileSync = (filepath: string, opts: Options = {}): DetectResu
|
||||||
const fd = fs.openSync(filepath, 'r');
|
const fd = fs.openSync(filepath, 'r');
|
||||||
const sample = Buffer.allocUnsafe(opts.sampleSize);
|
const sample = Buffer.allocUnsafe(opts.sampleSize);
|
||||||
|
|
||||||
fs.readSync(fd, sample, 0, opts.sampleSize);
|
fs.readSync(fd, sample, 0, opts.sampleSize, opts.offset);
|
||||||
fs.closeSync(fd);
|
fs.closeSync(fd);
|
||||||
return detect(sample);
|
return detect(sample);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue