feat: Add ASCII encoding support (#78)
This commit is contained in:
parent
29cb821fb3
commit
7f95705920
|
@ -8,6 +8,7 @@ assert(typeof chardet.detectFile, 'function');
|
||||||
assert(typeof chardet.detectFileSync, 'function');
|
assert(typeof chardet.detectFileSync, 'function');
|
||||||
|
|
||||||
assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
|
assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
|
||||||
|
{ confidence: 100, name: 'ASCII', lang: undefined },
|
||||||
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
|
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
|
||||||
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
|
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
|
||||||
{ confidence: 10, name: 'UTF-8', lang: undefined },
|
{ confidence: 10, name: 'UTF-8', lang: undefined },
|
||||||
|
|
|
@ -9,6 +9,7 @@ const main = async () => {
|
||||||
assert(typeof chardet.detectFileSync, 'function');
|
assert(typeof chardet.detectFileSync, 'function');
|
||||||
|
|
||||||
assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
|
assert.deepStrictEqual(chardet.analyse(Buffer.from('This is a test')), [
|
||||||
|
{ confidence: 100, name: 'ASCII', lang: undefined },
|
||||||
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
|
{ confidence: 98, name: 'ISO-8859-1', lang: 'en' },
|
||||||
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
|
{ confidence: 98, name: 'ISO-8859-2', lang: 'hu' },
|
||||||
{ confidence: 10, name: 'UTF-8', lang: undefined },
|
{ confidence: 10, name: 'UTF-8', lang: undefined },
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
import * as chardet from '..';
|
||||||
|
|
||||||
|
describe('ASCII', () => {
|
||||||
|
it('should return ASCII', () => {
|
||||||
|
expect(
|
||||||
|
chardet.detectFileSync(__dirname + '/../test/data/encodings/ascii')
|
||||||
|
).toBe('ASCII');
|
||||||
|
});
|
||||||
|
});
|
|
@ -0,0 +1,21 @@
|
||||||
|
import { Context, Recogniser } from '.';
|
||||||
|
import match, { Match } from '../match';
|
||||||
|
|
||||||
|
export default class Ascii implements Recogniser {
|
||||||
|
name() {
|
||||||
|
return 'ASCII';
|
||||||
|
}
|
||||||
|
|
||||||
|
match(det: Context): Match | null {
|
||||||
|
const input = det.rawInput;
|
||||||
|
|
||||||
|
for (let i = 0; i < det.rawLen; i++) {
|
||||||
|
const b = input[i];
|
||||||
|
if (b < 32 || b > 126) {
|
||||||
|
return match(det, this, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return match(det, this, 100);
|
||||||
|
}
|
||||||
|
}
|
|
@ -15,6 +15,7 @@ describe('chardet', () => {
|
||||||
{ 'confidence': 6, 'name': 'windows-1250', 'lang': 'pl' },
|
{ 'confidence': 6, 'name': 'windows-1250', 'lang': 'pl' },
|
||||||
{ 'confidence': 4, 'name': 'windows-1254', 'lang': 'tr' },
|
{ 'confidence': 4, 'name': 'windows-1254', 'lang': 'tr' },
|
||||||
{ 'confidence': 2, 'name': 'windows-1251', 'lang': 'ru' },
|
{ 'confidence': 2, 'name': 'windows-1251', 'lang': 'ru' },
|
||||||
|
{ 'confidence': 0, 'name': 'ASCII', 'lang': undefined },
|
||||||
];
|
];
|
||||||
|
|
||||||
it('has both named and default exports', () => {
|
it('has both named and default exports', () => {
|
||||||
|
|
|
@ -3,6 +3,7 @@ import { Recogniser, Context } from './encoding';
|
||||||
|
|
||||||
import loadFs from './fs/node';
|
import loadFs from './fs/node';
|
||||||
|
|
||||||
|
import Ascii from './encoding/ascii';
|
||||||
import Utf8 from './encoding/utf8';
|
import Utf8 from './encoding/utf8';
|
||||||
import * as unicode from './encoding/unicode';
|
import * as unicode from './encoding/unicode';
|
||||||
import * as mbcs from './encoding/mbcs';
|
import * as mbcs from './encoding/mbcs';
|
||||||
|
@ -40,6 +41,7 @@ const recognisers: Recogniser[] = [
|
||||||
new sbcs.windows_1251(),
|
new sbcs.windows_1251(),
|
||||||
new sbcs.windows_1256(),
|
new sbcs.windows_1256(),
|
||||||
new sbcs.KOI8_R(),
|
new sbcs.KOI8_R(),
|
||||||
|
new Ascii(),
|
||||||
];
|
];
|
||||||
|
|
||||||
export type AnalyseResult = Match[];
|
export type AnalyseResult = Match[];
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
|
Loading…
Reference in New Issue