Merge pull request #37 from runk/major-dev-deps

Major dev deps
2020-09-23 14:02:03 +10:00 · 2020-09-23 14:02:03 +10:00 · b1761347d5
parent bbb4fed4dd edcfec1eae
commit b1761347d5
17 changed files with 4274 additions and 1918 deletions
--- a/.eslintignore
+++ b/.eslintignore
@ -0,0 +1,2 @@
 lib
 jest.config.js
--- a/.eslintrc.json
+++ b/.eslintrc.json
@ -0,0 +1,10 @@
 {
  "root": true,
  "parser": "@typescript-eslint/parser",
  "plugins": ["@typescript-eslint"],
  "extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"],
  "rules": {
    "@typescript-eslint/no-unused-vars": ["warn", { "varsIgnorePattern": "_" }],
    "@typescript-eslint/no-inferrable-types": ["off"]
  }
 }
--- a/.travis.yml
+++ b/.travis.yml
@ -1,8 +1,8 @@
 language: node_js
 node_js:
  - "8"
  - "10"
  - "12"
  - "14"
 jobs:
  include:
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -14,7 +14,7 @@
  },
  "scripts": {
    "build": "rm -rf lib/* && tsc",
-    "lint": "tslint -p tsconfig.json -c tslint.json",
+    "lint": "eslint . --ext .js,.jsx,.ts,.tsx",
    "lint:types": "tsc --noEmit",
    "format": "prettier --write ./src/**/*.ts",
    "format:check": "prettier --list-different ./src/**/*.ts",
@ -33,14 +33,16 @@
    "test": "test"
  },
  "devDependencies": {
-    "@types/jest": "^25.1.4",
+    "@types/jest": "^26.0.14",
-    "@types/node": "^13.9.5",
+    "@types/node": "^14.11.2",
-    "jest": "^25.2.4",
+    "@typescript-eslint/eslint-plugin": "^4.2.0",
-    "prettier": "^2.0.2",
+    "@typescript-eslint/parser": "^4.2.0",
-    "semantic-release": "^15.14.0",
+    "eslint": "^7.9.0",
-    "ts-jest": "^25.2.1",
+    "jest": "^26.4.2",
-    "tslint": "^6.1.0",
+    "prettier": "^2.1.2",
-    "typescript": "^3.8.3"
+    "semantic-release": "^17.1.2",
    "ts-jest": "^26.4.0",
    "typescript": "^4.0.3"
  },
  "keywords": [
    "encoding",
--- a/src/encoding/iso2022.test.ts
+++ b/src/encoding/iso2022.test.ts
@ -1,7 +1,7 @@
 import * as chardet from '..';
 describe('ISO-2022', () => {
-  var base = __dirname + '/../test/data/encodings';
+  const base = __dirname + '/../test/data/encodings';
  it('should return ISO-2022-JP', () => {
    expect(chardet.detectFileSync(base + '/iso2022jp')).toBe('ISO-2022-JP');
--- a/src/encoding/iso2022.ts
+++ b/src/encoding/iso2022.ts
@ -1,6 +1,5 @@
 import { Context, Recogniser } from '.';
-
+import match, { Match } from '../match';
 var match = require('../match').default;
 /**
 * This is a superclass for the individual detectors for
@ -15,7 +14,7 @@ class ISO_2022 implements Recogniser {
    return 'ISO_2022';
  }
-  match(det: Context) {
+  match(det: Context): Match | null {
    /**
     * Matching function shared among the 2022 detectors JP, CN and KR
     * Counts up the number of legal an unrecognized escape sequences in
@ -29,16 +28,16 @@ class ISO_2022 implements Recogniser {
     * @return match quality, in the range of 0-100.
     */
-    var i, j;
+    let i, j;
-    var escN;
+    let escN;
-    var hits = 0;
+    let hits = 0;
-    var misses = 0;
+    let misses = 0;
-    var shifts = 0;
+    let shifts = 0;
-    var quality;
+    let quality;
    // TODO: refactor me
-    var text = det.fInputBytes;
+    const text = det.fInputBytes;
-    var textLen = det.fInputLen;
+    const textLen = det.fInputLen;
    scanInput: for (i = 0; i < textLen; i++) {
      if (text[i] == 0x1b) {
@ -47,7 +46,7 @@ class ISO_2022 implements Recogniser {
          escN < this.escapeSequences.length;
          escN++
        ) {
-          var seq = this.escapeSequences[escN];
+          const seq = this.escapeSequences[escN];
          if (textLen - i < seq.length) continue checkEscapes;
--- a/src/encoding/mbcs.test.ts
+++ b/src/encoding/mbcs.test.ts
@ -1,7 +1,7 @@
 import * as chardet from '..';
 describe('Multibyte Character Sets', () => {
-  var base = __dirname + '/../test/data/encodings';
+  const base = __dirname + '/../test/data/encodings';
  it('should return Shift_JIS', () => {
    expect(chardet.detectFileSync(base + '/shiftjis')).toBe('Shift_JIS');
--- a/src/encoding/mbcs.ts
+++ b/src/encoding/mbcs.ts
@ -1,5 +1,5 @@
 import { Context, Recogniser } from '.';
-var match = require('../match').default;
+import match, { Match } from '../match';
 /**
 * Binary search implementation (recursive)
@ -18,7 +18,7 @@ function binarySearch(arr: number[], searchValue: number) {
    There is a bug in the above line;
    Joshua Bloch suggests the following replacement:
    */
-    var mid = Math.floor((left + right) >>> 1);
+    const mid = Math.floor((left + right) >>> 1);
    if (searchValue > arr[mid]) return find(arr, searchValue, mid + 1, right);
    if (searchValue < arr[mid]) return find(arr, searchValue, left, mid - 1);
@ -68,7 +68,7 @@ class IteratedChar {
      this.done = true;
      return -1;
    }
-    var byteValue = det.fRawInput[this.nextIndex++] & 0x00ff;
+    const byteValue = det.fRawInput[this.nextIndex++] & 0x00ff;
    return byteValue;
  }
 }
@ -97,15 +97,15 @@ class mbcs implements Recogniser {
   *             bits 0-7:  the match confidence, ranging from 0-100
   *             bits 8-15: The match reason, an enum-like value.
   */
-  match(det: Context) {
+  match(det: Context): Match | null {
-    var singleByteCharCount = 0, //TODO Do we really need this?
+    let singleByteCharCount = 0, //TODO Do we really need this?
      doubleByteCharCount = 0,
      commonCharCount = 0,
      badCharCount = 0,
      totalCharCount = 0,
      confidence = 0;
-    var iter = new IteratedChar();
+    const iter = new IteratedChar();
    detectBlock: {
      for (iter.reset(); this.nextChar(iter, det); ) {
@ -113,7 +113,7 @@ class mbcs implements Recogniser {
        if (iter.error) {
          badCharCount++;
        } else {
-          var cv = iter.charValue & 0xffffffff;
+          const cv = iter.charValue & 0xffffffff;
          if (cv <= 0xff) {
            singleByteCharCount++;
@ -159,7 +159,7 @@ class mbcs implements Recogniser {
      }
      if (this.commonChars == null) {
-        // We have no statistics on frequently occuring characters.
+        // We have no statistics on frequently occurring characters.
        //  Assess confidence purely on having a reasonable number of
        //  multi-byte characters (the more the better
        confidence = 30 + doubleByteCharCount - 20 * badCharCount;
@ -167,12 +167,9 @@ class mbcs implements Recogniser {
          confidence = 100;
        }
      } else {
        //
        // Frequency of occurrence statistics exist.
-        //
+        const maxVal = Math.log(doubleByteCharCount / 4);
-        // @ts-ignore
+        const scaleFactor = 90.0 / maxVal;
        var maxVal = Math.log(parseFloat(doubleByteCharCount) / 4);
        var scaleFactor = 90.0 / maxVal;
        confidence = Math.floor(
          Math.log(commonCharCount + 1) * scaleFactor + 10
        );
@ -278,14 +275,13 @@ export class sjis extends mbcs {
    iter.index = iter.nextIndex;
    iter.error = false;
-    var firstByte;
+    const firstByte = (iter.charValue = iter.nextByte(det));
    firstByte = iter.charValue = iter.nextByte(det);
    if (firstByte < 0) return false;
    if (firstByte <= 0x7f || (firstByte > 0xa0 && firstByte <= 0xdf))
      return true;
-    var secondByte = iter.nextByte(det);
+    const secondByte = iter.nextByte(det);
    if (secondByte < 0) return false;
    iter.charValue = (firstByte << 8) | secondByte;
@ -418,14 +414,14 @@ export class big5 extends mbcs {
    iter.index = iter.nextIndex;
    iter.error = false;
-    var firstByte = (iter.charValue = iter.nextByte(det));
+    const firstByte = (iter.charValue = iter.nextByte(det));
    if (firstByte < 0) return false;
    // single byte character.
    if (firstByte <= 0x7f || firstByte == 0xff) return true;
-    var secondByte = iter.nextByte(det);
+    const secondByte = iter.nextByte(det);
    if (secondByte < 0) return false;
@ -450,9 +446,9 @@ export class big5 extends mbcs {
 function eucNextChar(iter: IteratedChar, det: Context) {
  iter.index = iter.nextIndex;
  iter.error = false;
-  var firstByte = 0;
+  let firstByte = 0;
-  var secondByte = 0;
+  let secondByte = 0;
-  var thirdByte = 0;
+  let thirdByte = 0;
  //int fourthByte = 0;
  buildChar: {
    firstByte = iter.charValue = iter.nextByte(det);
@ -763,10 +759,10 @@ export class gb_18030 extends mbcs {
  nextChar(iter: IteratedChar, det: Context) {
    iter.index = iter.nextIndex;
    iter.error = false;
-    var firstByte = 0;
+    let firstByte = 0;
-    var secondByte = 0;
+    let secondByte = 0;
-    var thirdByte = 0;
+    let thirdByte = 0;
-    var fourthByte = 0;
+    let fourthByte = 0;
    buildChar: {
      firstByte = iter.charValue = iter.nextByte(det);
      if (firstByte < 0) {
--- a/src/encoding/sbcs.test.ts
+++ b/src/encoding/sbcs.test.ts
@ -1,7 +1,7 @@
 import * as chardet from '..';
 describe('Singlebyte Character Sets', () => {
-  var base = __dirname + '/../test/data/encodings';
+  const base = __dirname + '/../test/data/encodings';
  it('should return ISO-8859-1 (English)', () => {
    expect(chardet.detectFileSync(base + '/iso88591_en')).toBe('ISO-8859-1');
--- a/src/encoding/sbcs.ts
+++ b/src/encoding/sbcs.ts
@ -1,13 +1,12 @@
 import { Context, Recogniser } from '../encoding/index';
-
+import match, { Match } from '../match';
 var match = require('../match').default;
 /**
 * This class recognizes single-byte encodings. Because the encoding scheme is so
 * simple, language statistics are used to do the matching.
 */
-var N_GRAM_MASK = 0xffffff;
+const N_GRAM_MASK = 0xffffff;
 class NGramParser {
  byteIndex: number = 0;
@ -31,7 +30,7 @@ class NGramParser {
   * Binary search for value in table, which must have exactly 64 entries.
   */
  search(table: number[], value: number) {
-    var index = 0;
+    let index = 0;
    if (table[index + 32] <= value) index += 32;
    if (table[index + 16] <= value) index += 16;
@ -65,12 +64,12 @@ class NGramParser {
  }
  parse(det: Context, spaceCh: number) {
-    var b,
+    let b,
      ignoreSpace = false;
    this.spaceChar = spaceCh;
    while ((b = this.nextByte(det)) >= 0) {
-      var mb = this.byteMap[b];
+      const mb = this.byteMap[b];
      // TODO: 0x20 might not be a space in all character sets...
      if (mb != 0) {
@ -85,7 +84,7 @@ class NGramParser {
    // TODO: Is this OK? The buffer could have ended in the middle of a word...
    this.addByte(this.spaceChar);
-    var rawPercent = this.hitCount / this.ngramCount;
+    const rawPercent = this.hitCount / this.ngramCount;
    // TODO - This is a bit of a hack to take care of a case
    // were we were getting a confidence of 135...
@ -119,35 +118,34 @@ class sbcs implements Recogniser {
    return [];
  }
  // @ts-ignore
  name(input: Context): string {
    return 'sbcs';
  }
-  match(det: Context) {
+  match(det: Context): Match | null {
-    var ngrams = this.ngrams();
+    const ngrams = this.ngrams();
    if (isFlatNgrams(ngrams)) {
-      var parser = new NGramParser(ngrams, this.byteMap());
+      const parser = new NGramParser(ngrams, this.byteMap());
-      var confidence = parser.parse(det, this.spaceChar);
+      const confidence = parser.parse(det, this.spaceChar);
      return confidence <= 0 ? null : match(det, this, confidence);
    }
-    var bestConfidenceSoFar = -1;
+    let bestConfidenceSoFar = -1;
-    var lang = null;
+    let lang;
-    for (var i = ngrams.length - 1; i >= 0; i--) {
+    for (let i = ngrams.length - 1; i >= 0; i--) {
-      var ngl = ngrams[i];
+      const ngl = ngrams[i];
-      var parser = new NGramParser(ngl.fNGrams, this.byteMap());
+      const parser = new NGramParser(ngl.fNGrams, this.byteMap());
-      var confidence = parser.parse(det, this.spaceChar);
+      const confidence = parser.parse(det, this.spaceChar);
      if (confidence > bestConfidenceSoFar) {
        bestConfidenceSoFar = confidence;
        lang = ngl.fLang;
      }
    }
-    var name = this.name(det);
+    const name = this.name(det);
    return bestConfidenceSoFar <= 0
      ? null
      : match(det, this, bestConfidenceSoFar, name, lang);
--- a/src/encoding/unicode.test.ts
+++ b/src/encoding/unicode.test.ts
@ -1,7 +1,7 @@
 import * as chardet from '..';
 describe('Unicode', () => {
-  var base = __dirname + '/../test/data/encodings';
+  const base = __dirname + '/../test/data/encodings';
  it('should return UTF-16LE', () => {
    expect(chardet.detectFileSync(base + '/utf16le')).toBe('UTF-16LE');
--- a/src/encoding/unicode.ts
+++ b/src/encoding/unicode.ts
@ -1,5 +1,5 @@
 import { Context, Recogniser } from '.';
-const match = require('../match').default;
+import match, { Match } from '../match';
 /**
 * This class matches UTF-16 and UTF-32, both big- and little-endian. The
@ -10,8 +10,8 @@ export class UTF_16BE implements Recogniser {
    return 'UTF-16BE';
  }
-  match(det: Context) {
+  match(det: Context): Match | null {
-    var input = det.fRawInput;
+    const input = det.fRawInput;
    if (
      input.length >= 2 &&
@ -30,8 +30,8 @@ export class UTF_16LE implements Recogniser {
  name() {
    return 'UTF-16LE';
  }
-  match(det: Context) {
+  match(det: Context): Match | null {
-    var input = det.fRawInput;
+    const input = det.fRawInput;
    if (
      input.length >= 2 &&
@ -64,13 +64,13 @@ class UTF_32 implements Recogniser, WithGetChar {
    return -1;
  }
-  match(det: Context) {
+  match(det: Context): Match | null {
-    var input = det.fRawInput,
+    let numValid = 0,
      limit = (det.fRawLength / 4) * 4,
      numValid = 0,
      numInvalid = 0,
      hasBOM = false,
      confidence = 0;
    const limit = (det.fRawLength / 4) * 4;
    const input = det.fRawInput;
    if (limit == 0) {
      return null;
@ -80,8 +80,8 @@ class UTF_32 implements Recogniser, WithGetChar {
      hasBOM = true;
    }
-    for (var i = 0; i < limit; i += 4) {
+    for (let i = 0; i < limit; i += 4) {
-      var ch = this.getChar(input, i);
+      const ch = this.getChar(input, i);
      if (ch < 0 || ch >= 0x10ffff || (ch >= 0xd800 && ch <= 0xdfff)) {
        numInvalid += 1;
--- a/src/encoding/utf8.ts
+++ b/src/encoding/utf8.ts
@ -1,19 +1,18 @@
 import { Context, Recogniser } from '.';
-
+import match, { Match } from '../match';
 var match = require('../match').default;
 export default class Utf8 implements Recogniser {
  name() {
    return 'UTF-8';
  }
-  match(det: Context) {
+  match(det: Context): Match | null {
-    var hasBOM = false,
+    let hasBOM = false,
      numValid = 0,
      numInvalid = 0,
      input = det.fRawInput,
      trailBytes = 0,
      confidence;
    const input = det.fRawInput;
    if (
      det.fRawLength >= 3 &&
@ -25,8 +24,8 @@ export default class Utf8 implements Recogniser {
    }
    // Scan for multi-byte sequences
-    for (var i = 0; i < det.fRawLength; i++) {
+    for (let i = 0; i < det.fRawLength; i++) {
-      var b = input[i];
+      const b = input[i];
      if ((b & 0x80) == 0) continue; // ASCII
      // Hi bit on char found.  Figure out how long the sequence should be
--- a/src/match.ts
+++ b/src/match.ts
@ -1,12 +1,12 @@
 import { Context, Recogniser } from "./encoding";
 export interface Match {
  confidence: number;
  name: string;
-  lang: string;
+  lang?: string;
 }
-// @ts-ignore
+export default (det: Context, rec: Recogniser, confidence: number, name?: string, lang?: string): Match => ({
 export default (det, rec, confidence, name, lang): Match => ({
  confidence,
  name: name || rec.name(det),
  lang,
--- a/tsconfig.json
+++ b/tsconfig.json
@ -14,7 +14,7 @@
    "removeComments": true,
    "sourceMap": true,
    "strict": true,
-    "target": "esnext"
+    "target": "ES2019"
  },
-  "exclude": ["node_modules", "**/*.spec.ts", "**/*.test.ts", "__mocks__"]
+  "exclude": ["node_modules", "**/*.spec.ts", "**/*.test.ts", "__mocks__", "lib"]
 }
--- a/tslint.json
+++ b/tslint.json
@ -1,23 +0,0 @@
 {
  "extends": "tslint:recommended",
  "rules": {
    "interface-name": [true, "never-prefix"],
    "quotemark": [true, "single"],
    "no-bitwise": false,
    "trailing-comma": [
      true,
      {
        "multiline": {
          "objects": "always",
          "arrays": "always",
          "functions": "never",
          "typeLiterals": "ignore"
        },
        "esSpecCompliant": true
      }
    ],
    "object-literal-sort-keys": false,
 		"radix": false,
 		"forin": false
  }
 }