Merge pull request #37 from runk/major-dev-deps

Major dev deps
This commit is contained in:
Dmitry Shirokov 2020-09-23 14:02:03 +10:00 committed by GitHub
commit b1761347d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 4274 additions and 1918 deletions

2
.eslintignore Normal file
View File

@ -0,0 +1,2 @@
lib
jest.config.js

10
.eslintrc.json Normal file
View File

@ -0,0 +1,10 @@
{
"root": true,
"parser": "@typescript-eslint/parser",
"plugins": ["@typescript-eslint"],
"extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"],
"rules": {
"@typescript-eslint/no-unused-vars": ["warn", { "varsIgnorePattern": "_" }],
"@typescript-eslint/no-inferrable-types": ["off"]
}
}

View File

@ -1,8 +1,8 @@
language: node_js language: node_js
node_js: node_js:
- "8"
- "10" - "10"
- "12" - "12"
- "14"
jobs: jobs:
include: include:

5939
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -14,7 +14,7 @@
}, },
"scripts": { "scripts": {
"build": "rm -rf lib/* && tsc", "build": "rm -rf lib/* && tsc",
"lint": "tslint -p tsconfig.json -c tslint.json", "lint": "eslint . --ext .js,.jsx,.ts,.tsx",
"lint:types": "tsc --noEmit", "lint:types": "tsc --noEmit",
"format": "prettier --write ./src/**/*.ts", "format": "prettier --write ./src/**/*.ts",
"format:check": "prettier --list-different ./src/**/*.ts", "format:check": "prettier --list-different ./src/**/*.ts",
@ -33,14 +33,16 @@
"test": "test" "test": "test"
}, },
"devDependencies": { "devDependencies": {
"@types/jest": "^25.1.4", "@types/jest": "^26.0.14",
"@types/node": "^13.9.5", "@types/node": "^14.11.2",
"jest": "^25.2.4", "@typescript-eslint/eslint-plugin": "^4.2.0",
"prettier": "^2.0.2", "@typescript-eslint/parser": "^4.2.0",
"semantic-release": "^15.14.0", "eslint": "^7.9.0",
"ts-jest": "^25.2.1", "jest": "^26.4.2",
"tslint": "^6.1.0", "prettier": "^2.1.2",
"typescript": "^3.8.3" "semantic-release": "^17.1.2",
"ts-jest": "^26.4.0",
"typescript": "^4.0.3"
}, },
"keywords": [ "keywords": [
"encoding", "encoding",

View File

@ -1,7 +1,7 @@
import * as chardet from '..'; import * as chardet from '..';
describe('ISO-2022', () => { describe('ISO-2022', () => {
var base = __dirname + '/../test/data/encodings'; const base = __dirname + '/../test/data/encodings';
it('should return ISO-2022-JP', () => { it('should return ISO-2022-JP', () => {
expect(chardet.detectFileSync(base + '/iso2022jp')).toBe('ISO-2022-JP'); expect(chardet.detectFileSync(base + '/iso2022jp')).toBe('ISO-2022-JP');

View File

@ -1,6 +1,5 @@
import { Context, Recogniser } from '.'; import { Context, Recogniser } from '.';
import match, { Match } from '../match';
var match = require('../match').default;
/** /**
* This is a superclass for the individual detectors for * This is a superclass for the individual detectors for
@ -15,7 +14,7 @@ class ISO_2022 implements Recogniser {
return 'ISO_2022'; return 'ISO_2022';
} }
match(det: Context) { match(det: Context): Match | null {
/** /**
* Matching function shared among the 2022 detectors JP, CN and KR * Matching function shared among the 2022 detectors JP, CN and KR
* Counts up the number of legal an unrecognized escape sequences in * Counts up the number of legal an unrecognized escape sequences in
@ -29,16 +28,16 @@ class ISO_2022 implements Recogniser {
* @return match quality, in the range of 0-100. * @return match quality, in the range of 0-100.
*/ */
var i, j; let i, j;
var escN; let escN;
var hits = 0; let hits = 0;
var misses = 0; let misses = 0;
var shifts = 0; let shifts = 0;
var quality; let quality;
// TODO: refactor me // TODO: refactor me
var text = det.fInputBytes; const text = det.fInputBytes;
var textLen = det.fInputLen; const textLen = det.fInputLen;
scanInput: for (i = 0; i < textLen; i++) { scanInput: for (i = 0; i < textLen; i++) {
if (text[i] == 0x1b) { if (text[i] == 0x1b) {
@ -47,7 +46,7 @@ class ISO_2022 implements Recogniser {
escN < this.escapeSequences.length; escN < this.escapeSequences.length;
escN++ escN++
) { ) {
var seq = this.escapeSequences[escN]; const seq = this.escapeSequences[escN];
if (textLen - i < seq.length) continue checkEscapes; if (textLen - i < seq.length) continue checkEscapes;

View File

@ -1,7 +1,7 @@
import * as chardet from '..'; import * as chardet from '..';
describe('Multibyte Character Sets', () => { describe('Multibyte Character Sets', () => {
var base = __dirname + '/../test/data/encodings'; const base = __dirname + '/../test/data/encodings';
it('should return Shift_JIS', () => { it('should return Shift_JIS', () => {
expect(chardet.detectFileSync(base + '/shiftjis')).toBe('Shift_JIS'); expect(chardet.detectFileSync(base + '/shiftjis')).toBe('Shift_JIS');

View File

@ -1,5 +1,5 @@
import { Context, Recogniser } from '.'; import { Context, Recogniser } from '.';
var match = require('../match').default; import match, { Match } from '../match';
/** /**
* Binary search implementation (recursive) * Binary search implementation (recursive)
@ -18,7 +18,7 @@ function binarySearch(arr: number[], searchValue: number) {
There is a bug in the above line; There is a bug in the above line;
Joshua Bloch suggests the following replacement: Joshua Bloch suggests the following replacement:
*/ */
var mid = Math.floor((left + right) >>> 1); const mid = Math.floor((left + right) >>> 1);
if (searchValue > arr[mid]) return find(arr, searchValue, mid + 1, right); if (searchValue > arr[mid]) return find(arr, searchValue, mid + 1, right);
if (searchValue < arr[mid]) return find(arr, searchValue, left, mid - 1); if (searchValue < arr[mid]) return find(arr, searchValue, left, mid - 1);
@ -68,7 +68,7 @@ class IteratedChar {
this.done = true; this.done = true;
return -1; return -1;
} }
var byteValue = det.fRawInput[this.nextIndex++] & 0x00ff; const byteValue = det.fRawInput[this.nextIndex++] & 0x00ff;
return byteValue; return byteValue;
} }
} }
@ -97,15 +97,15 @@ class mbcs implements Recogniser {
* bits 0-7: the match confidence, ranging from 0-100 * bits 0-7: the match confidence, ranging from 0-100
* bits 8-15: The match reason, an enum-like value. * bits 8-15: The match reason, an enum-like value.
*/ */
match(det: Context) { match(det: Context): Match | null {
var singleByteCharCount = 0, //TODO Do we really need this? let singleByteCharCount = 0, //TODO Do we really need this?
doubleByteCharCount = 0, doubleByteCharCount = 0,
commonCharCount = 0, commonCharCount = 0,
badCharCount = 0, badCharCount = 0,
totalCharCount = 0, totalCharCount = 0,
confidence = 0; confidence = 0;
var iter = new IteratedChar(); const iter = new IteratedChar();
detectBlock: { detectBlock: {
for (iter.reset(); this.nextChar(iter, det); ) { for (iter.reset(); this.nextChar(iter, det); ) {
@ -113,7 +113,7 @@ class mbcs implements Recogniser {
if (iter.error) { if (iter.error) {
badCharCount++; badCharCount++;
} else { } else {
var cv = iter.charValue & 0xffffffff; const cv = iter.charValue & 0xffffffff;
if (cv <= 0xff) { if (cv <= 0xff) {
singleByteCharCount++; singleByteCharCount++;
@ -159,7 +159,7 @@ class mbcs implements Recogniser {
} }
if (this.commonChars == null) { if (this.commonChars == null) {
// We have no statistics on frequently occuring characters. // We have no statistics on frequently occurring characters.
// Assess confidence purely on having a reasonable number of // Assess confidence purely on having a reasonable number of
// multi-byte characters (the more the better // multi-byte characters (the more the better
confidence = 30 + doubleByteCharCount - 20 * badCharCount; confidence = 30 + doubleByteCharCount - 20 * badCharCount;
@ -167,12 +167,9 @@ class mbcs implements Recogniser {
confidence = 100; confidence = 100;
} }
} else { } else {
//
// Frequency of occurrence statistics exist. // Frequency of occurrence statistics exist.
// const maxVal = Math.log(doubleByteCharCount / 4);
// @ts-ignore const scaleFactor = 90.0 / maxVal;
var maxVal = Math.log(parseFloat(doubleByteCharCount) / 4);
var scaleFactor = 90.0 / maxVal;
confidence = Math.floor( confidence = Math.floor(
Math.log(commonCharCount + 1) * scaleFactor + 10 Math.log(commonCharCount + 1) * scaleFactor + 10
); );
@ -278,14 +275,13 @@ export class sjis extends mbcs {
iter.index = iter.nextIndex; iter.index = iter.nextIndex;
iter.error = false; iter.error = false;
var firstByte; const firstByte = (iter.charValue = iter.nextByte(det));
firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0) return false; if (firstByte < 0) return false;
if (firstByte <= 0x7f || (firstByte > 0xa0 && firstByte <= 0xdf)) if (firstByte <= 0x7f || (firstByte > 0xa0 && firstByte <= 0xdf))
return true; return true;
var secondByte = iter.nextByte(det); const secondByte = iter.nextByte(det);
if (secondByte < 0) return false; if (secondByte < 0) return false;
iter.charValue = (firstByte << 8) | secondByte; iter.charValue = (firstByte << 8) | secondByte;
@ -418,14 +414,14 @@ export class big5 extends mbcs {
iter.index = iter.nextIndex; iter.index = iter.nextIndex;
iter.error = false; iter.error = false;
var firstByte = (iter.charValue = iter.nextByte(det)); const firstByte = (iter.charValue = iter.nextByte(det));
if (firstByte < 0) return false; if (firstByte < 0) return false;
// single byte character. // single byte character.
if (firstByte <= 0x7f || firstByte == 0xff) return true; if (firstByte <= 0x7f || firstByte == 0xff) return true;
var secondByte = iter.nextByte(det); const secondByte = iter.nextByte(det);
if (secondByte < 0) return false; if (secondByte < 0) return false;
@ -450,9 +446,9 @@ export class big5 extends mbcs {
function eucNextChar(iter: IteratedChar, det: Context) { function eucNextChar(iter: IteratedChar, det: Context) {
iter.index = iter.nextIndex; iter.index = iter.nextIndex;
iter.error = false; iter.error = false;
var firstByte = 0; let firstByte = 0;
var secondByte = 0; let secondByte = 0;
var thirdByte = 0; let thirdByte = 0;
//int fourthByte = 0; //int fourthByte = 0;
buildChar: { buildChar: {
firstByte = iter.charValue = iter.nextByte(det); firstByte = iter.charValue = iter.nextByte(det);
@ -763,10 +759,10 @@ export class gb_18030 extends mbcs {
nextChar(iter: IteratedChar, det: Context) { nextChar(iter: IteratedChar, det: Context) {
iter.index = iter.nextIndex; iter.index = iter.nextIndex;
iter.error = false; iter.error = false;
var firstByte = 0; let firstByte = 0;
var secondByte = 0; let secondByte = 0;
var thirdByte = 0; let thirdByte = 0;
var fourthByte = 0; let fourthByte = 0;
buildChar: { buildChar: {
firstByte = iter.charValue = iter.nextByte(det); firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0) { if (firstByte < 0) {

View File

@ -1,7 +1,7 @@
import * as chardet from '..'; import * as chardet from '..';
describe('Singlebyte Character Sets', () => { describe('Singlebyte Character Sets', () => {
var base = __dirname + '/../test/data/encodings'; const base = __dirname + '/../test/data/encodings';
it('should return ISO-8859-1 (English)', () => { it('should return ISO-8859-1 (English)', () => {
expect(chardet.detectFileSync(base + '/iso88591_en')).toBe('ISO-8859-1'); expect(chardet.detectFileSync(base + '/iso88591_en')).toBe('ISO-8859-1');

View File

@ -1,13 +1,12 @@
import { Context, Recogniser } from '../encoding/index'; import { Context, Recogniser } from '../encoding/index';
import match, { Match } from '../match';
var match = require('../match').default;
/** /**
* This class recognizes single-byte encodings. Because the encoding scheme is so * This class recognizes single-byte encodings. Because the encoding scheme is so
* simple, language statistics are used to do the matching. * simple, language statistics are used to do the matching.
*/ */
var N_GRAM_MASK = 0xffffff; const N_GRAM_MASK = 0xffffff;
class NGramParser { class NGramParser {
byteIndex: number = 0; byteIndex: number = 0;
@ -31,7 +30,7 @@ class NGramParser {
* Binary search for value in table, which must have exactly 64 entries. * Binary search for value in table, which must have exactly 64 entries.
*/ */
search(table: number[], value: number) { search(table: number[], value: number) {
var index = 0; let index = 0;
if (table[index + 32] <= value) index += 32; if (table[index + 32] <= value) index += 32;
if (table[index + 16] <= value) index += 16; if (table[index + 16] <= value) index += 16;
@ -65,12 +64,12 @@ class NGramParser {
} }
parse(det: Context, spaceCh: number) { parse(det: Context, spaceCh: number) {
var b, let b,
ignoreSpace = false; ignoreSpace = false;
this.spaceChar = spaceCh; this.spaceChar = spaceCh;
while ((b = this.nextByte(det)) >= 0) { while ((b = this.nextByte(det)) >= 0) {
var mb = this.byteMap[b]; const mb = this.byteMap[b];
// TODO: 0x20 might not be a space in all character sets... // TODO: 0x20 might not be a space in all character sets...
if (mb != 0) { if (mb != 0) {
@ -85,7 +84,7 @@ class NGramParser {
// TODO: Is this OK? The buffer could have ended in the middle of a word... // TODO: Is this OK? The buffer could have ended in the middle of a word...
this.addByte(this.spaceChar); this.addByte(this.spaceChar);
var rawPercent = this.hitCount / this.ngramCount; const rawPercent = this.hitCount / this.ngramCount;
// TODO - This is a bit of a hack to take care of a case // TODO - This is a bit of a hack to take care of a case
// were we were getting a confidence of 135... // were we were getting a confidence of 135...
@ -119,35 +118,34 @@ class sbcs implements Recogniser {
return []; return [];
} }
// @ts-ignore
name(input: Context): string { name(input: Context): string {
return 'sbcs'; return 'sbcs';
} }
match(det: Context) { match(det: Context): Match | null {
var ngrams = this.ngrams(); const ngrams = this.ngrams();
if (isFlatNgrams(ngrams)) { if (isFlatNgrams(ngrams)) {
var parser = new NGramParser(ngrams, this.byteMap()); const parser = new NGramParser(ngrams, this.byteMap());
var confidence = parser.parse(det, this.spaceChar); const confidence = parser.parse(det, this.spaceChar);
return confidence <= 0 ? null : match(det, this, confidence); return confidence <= 0 ? null : match(det, this, confidence);
} }
var bestConfidenceSoFar = -1; let bestConfidenceSoFar = -1;
var lang = null; let lang;
for (var i = ngrams.length - 1; i >= 0; i--) { for (let i = ngrams.length - 1; i >= 0; i--) {
var ngl = ngrams[i]; const ngl = ngrams[i];
var parser = new NGramParser(ngl.fNGrams, this.byteMap()); const parser = new NGramParser(ngl.fNGrams, this.byteMap());
var confidence = parser.parse(det, this.spaceChar); const confidence = parser.parse(det, this.spaceChar);
if (confidence > bestConfidenceSoFar) { if (confidence > bestConfidenceSoFar) {
bestConfidenceSoFar = confidence; bestConfidenceSoFar = confidence;
lang = ngl.fLang; lang = ngl.fLang;
} }
} }
var name = this.name(det); const name = this.name(det);
return bestConfidenceSoFar <= 0 return bestConfidenceSoFar <= 0
? null ? null
: match(det, this, bestConfidenceSoFar, name, lang); : match(det, this, bestConfidenceSoFar, name, lang);

View File

@ -1,7 +1,7 @@
import * as chardet from '..'; import * as chardet from '..';
describe('Unicode', () => { describe('Unicode', () => {
var base = __dirname + '/../test/data/encodings'; const base = __dirname + '/../test/data/encodings';
it('should return UTF-16LE', () => { it('should return UTF-16LE', () => {
expect(chardet.detectFileSync(base + '/utf16le')).toBe('UTF-16LE'); expect(chardet.detectFileSync(base + '/utf16le')).toBe('UTF-16LE');

View File

@ -1,5 +1,5 @@
import { Context, Recogniser } from '.'; import { Context, Recogniser } from '.';
const match = require('../match').default; import match, { Match } from '../match';
/** /**
* This class matches UTF-16 and UTF-32, both big- and little-endian. The * This class matches UTF-16 and UTF-32, both big- and little-endian. The
@ -10,8 +10,8 @@ export class UTF_16BE implements Recogniser {
return 'UTF-16BE'; return 'UTF-16BE';
} }
match(det: Context) { match(det: Context): Match | null {
var input = det.fRawInput; const input = det.fRawInput;
if ( if (
input.length >= 2 && input.length >= 2 &&
@ -30,8 +30,8 @@ export class UTF_16LE implements Recogniser {
name() { name() {
return 'UTF-16LE'; return 'UTF-16LE';
} }
match(det: Context) { match(det: Context): Match | null {
var input = det.fRawInput; const input = det.fRawInput;
if ( if (
input.length >= 2 && input.length >= 2 &&
@ -64,13 +64,13 @@ class UTF_32 implements Recogniser, WithGetChar {
return -1; return -1;
} }
match(det: Context) { match(det: Context): Match | null {
var input = det.fRawInput, let numValid = 0,
limit = (det.fRawLength / 4) * 4,
numValid = 0,
numInvalid = 0, numInvalid = 0,
hasBOM = false, hasBOM = false,
confidence = 0; confidence = 0;
const limit = (det.fRawLength / 4) * 4;
const input = det.fRawInput;
if (limit == 0) { if (limit == 0) {
return null; return null;
@ -80,8 +80,8 @@ class UTF_32 implements Recogniser, WithGetChar {
hasBOM = true; hasBOM = true;
} }
for (var i = 0; i < limit; i += 4) { for (let i = 0; i < limit; i += 4) {
var ch = this.getChar(input, i); const ch = this.getChar(input, i);
if (ch < 0 || ch >= 0x10ffff || (ch >= 0xd800 && ch <= 0xdfff)) { if (ch < 0 || ch >= 0x10ffff || (ch >= 0xd800 && ch <= 0xdfff)) {
numInvalid += 1; numInvalid += 1;

View File

@ -1,19 +1,18 @@
import { Context, Recogniser } from '.'; import { Context, Recogniser } from '.';
import match, { Match } from '../match';
var match = require('../match').default;
export default class Utf8 implements Recogniser { export default class Utf8 implements Recogniser {
name() { name() {
return 'UTF-8'; return 'UTF-8';
} }
match(det: Context) { match(det: Context): Match | null {
var hasBOM = false, let hasBOM = false,
numValid = 0, numValid = 0,
numInvalid = 0, numInvalid = 0,
input = det.fRawInput,
trailBytes = 0, trailBytes = 0,
confidence; confidence;
const input = det.fRawInput;
if ( if (
det.fRawLength >= 3 && det.fRawLength >= 3 &&
@ -25,8 +24,8 @@ export default class Utf8 implements Recogniser {
} }
// Scan for multi-byte sequences // Scan for multi-byte sequences
for (var i = 0; i < det.fRawLength; i++) { for (let i = 0; i < det.fRawLength; i++) {
var b = input[i]; const b = input[i];
if ((b & 0x80) == 0) continue; // ASCII if ((b & 0x80) == 0) continue; // ASCII
// Hi bit on char found. Figure out how long the sequence should be // Hi bit on char found. Figure out how long the sequence should be

View File

@ -1,12 +1,12 @@
import { Context, Recogniser } from "./encoding";
export interface Match { export interface Match {
confidence: number; confidence: number;
name: string; name: string;
lang: string; lang?: string;
} }
// @ts-ignore export default (det: Context, rec: Recogniser, confidence: number, name?: string, lang?: string): Match => ({
export default (det, rec, confidence, name, lang): Match => ({
confidence, confidence,
name: name || rec.name(det), name: name || rec.name(det),
lang, lang,

View File

@ -14,7 +14,7 @@
"removeComments": true, "removeComments": true,
"sourceMap": true, "sourceMap": true,
"strict": true, "strict": true,
"target": "esnext" "target": "ES2019"
}, },
"exclude": ["node_modules", "**/*.spec.ts", "**/*.test.ts", "__mocks__"] "exclude": ["node_modules", "**/*.spec.ts", "**/*.test.ts", "__mocks__", "lib"]
} }

View File

@ -1,23 +0,0 @@
{
"extends": "tslint:recommended",
"rules": {
"interface-name": [true, "never-prefix"],
"quotemark": [true, "single"],
"no-bitwise": false,
"trailing-comma": [
true,
{
"multiline": {
"objects": "always",
"arrays": "always",
"functions": "never",
"typeLiterals": "ignore"
},
"esSpecCompliant": true
}
],
"object-literal-sort-keys": false,
"radix": false,
"forin": false
}
}