Merge pull request #64 from crisp-dev/master
feat: allow position offset as option
This commit is contained in:
commit
685cba81b3
14
README.md
14
README.md
|
@ -37,7 +37,7 @@ import chardet from 'chardet';
|
|||
chardet.analyse(Buffer.from('hello there!'));
|
||||
```
|
||||
|
||||
Returned value is an array of objects sorted by confidence value in decending order
|
||||
Returned value is an array of objects sorted by confidence value in descending order
|
||||
|
||||
```javascript
|
||||
[
|
||||
|
@ -48,8 +48,8 @@ Returned value is an array of objects sorted by confidence value in decending or
|
|||
|
||||
## Working with large data sets
|
||||
|
||||
Sometimes, when data set is huge and you want to optimize performace (with a tradeoff of less accuracy),
|
||||
you can sample only first N bytes of the buffer:
|
||||
Sometimes, when data set is huge and you want to optimize performance (with a tradeoff of less accuracy),
|
||||
you can sample only the first N bytes of the buffer:
|
||||
|
||||
```javascript
|
||||
chardet
|
||||
|
@ -57,6 +57,14 @@ chardet
|
|||
.then(encoding => console.log(encoding));
|
||||
```
|
||||
|
||||
You can also specify where to begin reading from in the buffer:
|
||||
|
||||
```javascript
|
||||
chardet
|
||||
.detectFile('/path/to/file', { sampleSize: 32, offset: 128 })
|
||||
.then(encoding => console.log(encoding));
|
||||
```
|
||||
|
||||
## Supported Encodings:
|
||||
|
||||
- UTF-8
|
||||
|
|
|
@ -40,6 +40,11 @@ describe('chardet', () => {
|
|||
const res = await chardet.detectFile(path, { sampleSize: 32 });
|
||||
expect(res).toBe('UTF-8');
|
||||
});
|
||||
|
||||
it('should detect encoding with smaller sample size and offset', async () => {
|
||||
const res = await chardet.detectFile(path, { sampleSize: 32, offset: 64 });
|
||||
expect(res).toBe('UTF-8');
|
||||
});
|
||||
});
|
||||
|
||||
describe('#detectFileSync', () => {
|
||||
|
@ -50,6 +55,10 @@ describe('chardet', () => {
|
|||
it('should detect encoding with smaller sample size', () => {
|
||||
expect(chardet.detectFileSync(path, { sampleSize: 32 })).toBe('UTF-8');
|
||||
});
|
||||
|
||||
it('should detect encoding with smaller sample size and offset', () => {
|
||||
expect(chardet.detectFileSync(path, { sampleSize: 32, offset: 64 })).toBe('UTF-8');
|
||||
});
|
||||
});
|
||||
|
||||
describe('#analyse', () => {
|
||||
|
|
|
@ -10,7 +10,8 @@ import * as sbcs from './encoding/sbcs';
|
|||
import * as iso2022 from './encoding/iso2022';
|
||||
|
||||
interface FullOptions {
|
||||
sampleSize: number
|
||||
sampleSize: number,
|
||||
offset: number
|
||||
}
|
||||
|
||||
type Options = Partial<FullOptions>
|
||||
|
@ -107,7 +108,7 @@ export const detectFile = (filepath: string, opts: Options = {}): Promise<Detect
|
|||
fd = fs.openSync(filepath, 'r');
|
||||
const sample: Buffer = Buffer.allocUnsafe(opts.sampleSize);
|
||||
|
||||
fs.read(fd, sample, 0, opts.sampleSize, null, (err?: Error) => {
|
||||
fs.read(fd, sample, 0, opts.sampleSize, opts.offset, (err?: Error) => {
|
||||
handler(err, sample);
|
||||
});
|
||||
return;
|
||||
|
@ -123,7 +124,7 @@ export const detectFileSync = (filepath: string, opts: Options = {}): DetectResu
|
|||
const fd = fs.openSync(filepath, 'r');
|
||||
const sample = Buffer.allocUnsafe(opts.sampleSize);
|
||||
|
||||
fs.readSync(fd, sample, 0, opts.sampleSize);
|
||||
fs.readSync(fd, sample, 0, opts.sampleSize, opts.offset);
|
||||
fs.closeSync(fd);
|
||||
return detect(sample);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue