Expose build

This commit is contained in:
Eliott Vincent 2022-10-06 10:38:38 +02:00
parent a169cc58d7
commit fa78cd0885
31 changed files with 5793 additions and 1 deletions

1
.gitignore vendored
View File

@ -3,6 +3,5 @@ testing.js
node_modules
coverage
npm-debug.log
lib
TODO.md
package-lock.json

14
lib/encoding/index.d.ts vendored Normal file
View File

@ -0,0 +1,14 @@
import { Match } from '../match';
export interface Recogniser {
match(input: Context): Match | null;
name(input?: Context): string;
language?(): string | undefined;
}
export interface Context {
byteStats: number[];
c1Bytes: boolean;
rawInput: Uint8Array;
rawLen: number;
inputBytes: Uint8Array;
inputLen: number;
}

3
lib/encoding/index.js Normal file
View File

@ -0,0 +1,3 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
//# sourceMappingURL=index.js.map

View File

@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/encoding/index.ts"],"names":[],"mappings":""}

23
lib/encoding/iso2022.d.ts vendored Normal file
View File

@ -0,0 +1,23 @@
import { Context, Recogniser } from '.';
import { Match } from '../match';
declare class ISO_2022 implements Recogniser {
escapeSequences: number[][];
name(): string;
match(det: Context): Match | null;
}
export declare class ISO_2022_JP extends ISO_2022 {
name(): string;
language(): string;
escapeSequences: number[][];
}
export declare class ISO_2022_KR extends ISO_2022 {
name(): string;
language(): string;
escapeSequences: number[][];
}
export declare class ISO_2022_CN extends ISO_2022 {
name(): string;
language(): string;
escapeSequences: number[][];
}
export {};

114
lib/encoding/iso2022.js Normal file
View File

@ -0,0 +1,114 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ISO_2022_CN = exports.ISO_2022_KR = exports.ISO_2022_JP = void 0;
const match_1 = __importDefault(require("../match"));
class ISO_2022 {
constructor() {
this.escapeSequences = [];
}
name() {
return 'ISO_2022';
}
match(det) {
let i, j;
let escN;
let hits = 0;
let misses = 0;
let shifts = 0;
let confidence;
const text = det.inputBytes;
const textLen = det.inputLen;
scanInput: for (i = 0; i < textLen; i++) {
if (text[i] == 0x1b) {
checkEscapes: for (escN = 0; escN < this.escapeSequences.length; escN++) {
const seq = this.escapeSequences[escN];
if (textLen - i < seq.length)
continue checkEscapes;
for (j = 1; j < seq.length; j++)
if (seq[j] != text[i + j])
continue checkEscapes;
hits++;
i += seq.length - 1;
continue scanInput;
}
misses++;
}
if (text[i] == 0x0e || text[i] == 0x0f)
shifts++;
}
if (hits == 0)
return null;
confidence = (100 * hits - 100 * misses) / (hits + misses);
if (hits + shifts < 5)
confidence -= (5 - (hits + shifts)) * 10;
return confidence <= 0 ? null : (0, match_1.default)(det, this, confidence);
}
}
class ISO_2022_JP extends ISO_2022 {
constructor() {
super(...arguments);
this.escapeSequences = [
[0x1b, 0x24, 0x28, 0x43],
[0x1b, 0x24, 0x28, 0x44],
[0x1b, 0x24, 0x40],
[0x1b, 0x24, 0x41],
[0x1b, 0x24, 0x42],
[0x1b, 0x26, 0x40],
[0x1b, 0x28, 0x42],
[0x1b, 0x28, 0x48],
[0x1b, 0x28, 0x49],
[0x1b, 0x28, 0x4a],
[0x1b, 0x2e, 0x41],
[0x1b, 0x2e, 0x46],
];
}
name() {
return 'ISO-2022-JP';
}
language() {
return 'ja';
}
}
exports.ISO_2022_JP = ISO_2022_JP;
class ISO_2022_KR extends ISO_2022 {
constructor() {
super(...arguments);
this.escapeSequences = [[0x1b, 0x24, 0x29, 0x43]];
}
name() {
return 'ISO-2022-KR';
}
language() {
return 'kr';
}
}
exports.ISO_2022_KR = ISO_2022_KR;
class ISO_2022_CN extends ISO_2022 {
constructor() {
super(...arguments);
this.escapeSequences = [
[0x1b, 0x24, 0x29, 0x41],
[0x1b, 0x24, 0x29, 0x47],
[0x1b, 0x24, 0x2a, 0x48],
[0x1b, 0x24, 0x29, 0x45],
[0x1b, 0x24, 0x2b, 0x49],
[0x1b, 0x24, 0x2b, 0x4a],
[0x1b, 0x24, 0x2b, 0x4b],
[0x1b, 0x24, 0x2b, 0x4c],
[0x1b, 0x24, 0x2b, 0x4d],
[0x1b, 0x4e],
[0x1b, 0x4f],
];
}
name() {
return 'ISO-2022-CN';
}
language() {
return 'zh';
}
}
exports.ISO_2022_CN = ISO_2022_CN;
//# sourceMappingURL=iso2022.js.map

View File

@ -0,0 +1 @@
{"version":3,"file":"iso2022.js","sourceRoot":"","sources":["../../src/encoding/iso2022.ts"],"names":[],"mappings":";;;;;;AACA,qDAAwC;AAQxC,MAAM,QAAQ;IAAd;QACE,oBAAe,GAAe,EAAE,CAAC;IA0EnC,CAAC;IAxEC,IAAI;QACF,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,GAAY;QAchB,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,IAAI,CAAC;QACT,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,UAAU,CAAC;QAGf,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC;QAC5B,MAAM,OAAO,GAAG,GAAG,CAAC,QAAQ,CAAC;QAE7B,SAAS,EAAE,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE;YACvC,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE;gBACnB,YAAY,EAAE,KACZ,IAAI,GAAG,CAAC,EACR,IAAI,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,EAClC,IAAI,EAAE,EACN;oBACA,MAAM,GAAG,GAAG,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;oBAEvC,IAAI,OAAO,GAAG,CAAC,GAAG,GAAG,CAAC,MAAM;wBAAE,SAAS,YAAY,CAAC;oBAEpD,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE;wBAC7B,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;4BAAE,SAAS,YAAY,CAAC;oBAEnD,IAAI,EAAE,CAAC;oBACP,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;oBACpB,SAAS,SAAS,CAAC;iBACpB;gBAED,MAAM,EAAE,CAAC;aACV;YAGD,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI;gBAAE,MAAM,EAAE,CAAC;SAClD;QAED,IAAI,IAAI,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;QAQ3B,UAAU,GAAG,CAAC,GAAG,GAAG,IAAI,GAAG,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,MAAM,CAAC,CAAC;QAK3D,IAAI,IAAI,GAAG,MAAM,GAAG,CAAC;YAAE,UAAU,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,GAAG,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC;QAEhE,OAAO,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAA,eAAK,EAAC,GAAG,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;IAC/D,CAAC;CACF;AAED,MAAa,WAAY,SAAQ,QAAQ;IAAzC;;QASE,oBAAe,GAAG;YAChB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YAClB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YAClB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YAClB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YAClB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YAClB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YAClB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YAClB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YAClB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YAClB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;SACnB,CAAC;IACJ,CAAC;IAtBC,IAAI;QACF,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC;IACd,CAAC;CAgBF;AAvBD,kCAuBC;AAED,MAAa,WAAY,SAAQ,QAAQ;IAAzC;;QAOE,oBAAe,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IAC/C,CAAC;IAPC,IAAI;QACF,OAAO,aAAa,CAAC;IACvB,CAAC;IACD,QAAQ;QACN,OAAO,IAAI,CAAC;IACd,CAAC;CAEF;AARD,kCAQC;AAED,MAAa,WAAY,SAAQ,QAAQ;IAAzC;;QAOE,oBAAe,GAAG;YAChB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACxB,CAAC,IAAI,EAAE,IAAI,CAAC;YACZ,CAAC,IAAI,EAAE,IAAI,CAAC;SACb,CAAC;IACJ,CAAC;IAnBC,IAAI;QACF,OAAO,aAAa,CAAC;IACvB,CAAC;IACD,QAAQ;QACN,OAAO,IAAI,CAAC;IACd,CAAC;CAcF;AApBD,kCAoBC"}

50
lib/encoding/mbcs.d.ts vendored Normal file
View File

@ -0,0 +1,50 @@
import { Context, Recogniser } from '.';
import { Match } from '../match';
declare class IteratedChar {
charValue: number;
index: number;
nextIndex: number;
error: boolean;
done: boolean;
constructor();
reset(): void;
nextByte(det: Context): number;
}
declare class mbcs implements Recogniser {
commonChars: number[];
name(): string;
match(det: Context): Match | null;
nextChar(_iter: IteratedChar, _det: Context): boolean;
}
export declare class sjis extends mbcs {
name(): string;
language(): string;
commonChars: number[];
nextChar(iter: IteratedChar, det: Context): boolean;
}
export declare class big5 extends mbcs {
name(): string;
language(): string;
commonChars: number[];
nextChar(iter: IteratedChar, det: Context): boolean;
}
declare function eucNextChar(iter: IteratedChar, det: Context): boolean;
export declare class euc_jp extends mbcs {
name(): string;
language(): string;
commonChars: number[];
nextChar: typeof eucNextChar;
}
export declare class euc_kr extends mbcs {
name(): string;
language(): string;
commonChars: number[];
nextChar: typeof eucNextChar;
}
export declare class gb_18030 extends mbcs {
name(): string;
language(): string;
nextChar(iter: IteratedChar, det: Context): boolean;
commonChars: number[];
}
export {};

746
lib/encoding/mbcs.js Normal file
View File

@ -0,0 +1,746 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.gb_18030 = exports.euc_kr = exports.euc_jp = exports.big5 = exports.sjis = void 0;
const match_1 = __importDefault(require("../match"));
function binarySearch(arr, searchValue) {
const find = (arr, searchValue, left, right) => {
if (right < left)
return -1;
const mid = Math.floor((left + right) >>> 1);
if (searchValue > arr[mid])
return find(arr, searchValue, mid + 1, right);
if (searchValue < arr[mid])
return find(arr, searchValue, left, mid - 1);
return mid;
};
return find(arr, searchValue, 0, arr.length - 1);
}
class IteratedChar {
constructor() {
this.charValue = 0;
this.index = 0;
this.nextIndex = 0;
this.error = false;
this.done = false;
}
reset() {
this.charValue = 0;
this.index = -1;
this.nextIndex = 0;
this.error = false;
this.done = false;
}
nextByte(det) {
if (this.nextIndex >= det.rawLen) {
this.done = true;
return -1;
}
const byteValue = det.rawInput[this.nextIndex++] & 0x00ff;
return byteValue;
}
}
class mbcs {
constructor() {
this.commonChars = [];
}
name() {
return 'mbcs';
}
match(det) {
let doubleByteCharCount = 0, commonCharCount = 0, badCharCount = 0, totalCharCount = 0, confidence = 0;
const iter = new IteratedChar();
detectBlock: {
for (iter.reset(); this.nextChar(iter, det);) {
totalCharCount++;
if (iter.error) {
badCharCount++;
}
else {
const cv = iter.charValue & 0xffffffff;
if (cv > 0xff) {
doubleByteCharCount++;
if (this.commonChars != null) {
if (binarySearch(this.commonChars, cv) >= 0) {
commonCharCount++;
}
}
}
}
if (badCharCount >= 2 && badCharCount * 5 >= doubleByteCharCount) {
break detectBlock;
}
}
if (doubleByteCharCount <= 10 && badCharCount == 0) {
if (doubleByteCharCount == 0 && totalCharCount < 10) {
confidence = 0;
}
else {
confidence = 10;
}
break detectBlock;
}
if (doubleByteCharCount < 20 * badCharCount) {
confidence = 0;
break detectBlock;
}
if (this.commonChars == null) {
confidence = 30 + doubleByteCharCount - 20 * badCharCount;
if (confidence > 100) {
confidence = 100;
}
}
else {
const maxVal = Math.log(doubleByteCharCount / 4);
const scaleFactor = 90.0 / maxVal;
confidence = Math.floor(Math.log(commonCharCount + 1) * scaleFactor + 10);
confidence = Math.min(confidence, 100);
}
}
return confidence == 0 ? null : (0, match_1.default)(det, this, confidence);
}
nextChar(_iter, _det) {
return true;
}
}
class sjis extends mbcs {
constructor() {
super(...arguments);
this.commonChars = [
0x8140,
0x8141,
0x8142,
0x8145,
0x815b,
0x8169,
0x816a,
0x8175,
0x8176,
0x82a0,
0x82a2,
0x82a4,
0x82a9,
0x82aa,
0x82ab,
0x82ad,
0x82af,
0x82b1,
0x82b3,
0x82b5,
0x82b7,
0x82bd,
0x82be,
0x82c1,
0x82c4,
0x82c5,
0x82c6,
0x82c8,
0x82c9,
0x82cc,
0x82cd,
0x82dc,
0x82e0,
0x82e7,
0x82e8,
0x82e9,
0x82ea,
0x82f0,
0x82f1,
0x8341,
0x8343,
0x834e,
0x834f,
0x8358,
0x835e,
0x8362,
0x8367,
0x8375,
0x8376,
0x8389,
0x838a,
0x838b,
0x838d,
0x8393,
0x8e96,
0x93fa,
0x95aa,
];
}
name() {
return 'Shift_JIS';
}
language() {
return 'ja';
}
nextChar(iter, det) {
iter.index = iter.nextIndex;
iter.error = false;
const firstByte = (iter.charValue = iter.nextByte(det));
if (firstByte < 0)
return false;
if (firstByte <= 0x7f || (firstByte > 0xa0 && firstByte <= 0xdf))
return true;
const secondByte = iter.nextByte(det);
if (secondByte < 0)
return false;
iter.charValue = (firstByte << 8) | secondByte;
if (!((secondByte >= 0x40 && secondByte <= 0x7f) ||
(secondByte >= 0x80 && secondByte <= 0xff))) {
iter.error = true;
}
return true;
}
}
exports.sjis = sjis;
class big5 extends mbcs {
constructor() {
super(...arguments);
this.commonChars = [
0xa140,
0xa141,
0xa142,
0xa143,
0xa147,
0xa149,
0xa175,
0xa176,
0xa440,
0xa446,
0xa447,
0xa448,
0xa451,
0xa454,
0xa457,
0xa464,
0xa46a,
0xa46c,
0xa477,
0xa4a3,
0xa4a4,
0xa4a7,
0xa4c1,
0xa4ce,
0xa4d1,
0xa4df,
0xa4e8,
0xa4fd,
0xa540,
0xa548,
0xa558,
0xa569,
0xa5cd,
0xa5e7,
0xa657,
0xa661,
0xa662,
0xa668,
0xa670,
0xa6a8,
0xa6b3,
0xa6b9,
0xa6d3,
0xa6db,
0xa6e6,
0xa6f2,
0xa740,
0xa751,
0xa759,
0xa7da,
0xa8a3,
0xa8a5,
0xa8ad,
0xa8d1,
0xa8d3,
0xa8e4,
0xa8fc,
0xa9c0,
0xa9d2,
0xa9f3,
0xaa6b,
0xaaba,
0xaabe,
0xaacc,
0xaafc,
0xac47,
0xac4f,
0xacb0,
0xacd2,
0xad59,
0xaec9,
0xafe0,
0xb0ea,
0xb16f,
0xb2b3,
0xb2c4,
0xb36f,
0xb44c,
0xb44e,
0xb54c,
0xb5a5,
0xb5bd,
0xb5d0,
0xb5d8,
0xb671,
0xb7ed,
0xb867,
0xb944,
0xbad8,
0xbb44,
0xbba1,
0xbdd1,
0xc2c4,
0xc3b9,
0xc440,
0xc45f,
];
}
name() {
return 'Big5';
}
language() {
return 'zh';
}
nextChar(iter, det) {
iter.index = iter.nextIndex;
iter.error = false;
const firstByte = (iter.charValue = iter.nextByte(det));
if (firstByte < 0)
return false;
if (firstByte <= 0x7f || firstByte == 0xff)
return true;
const secondByte = iter.nextByte(det);
if (secondByte < 0)
return false;
iter.charValue = (iter.charValue << 8) | secondByte;
if (secondByte < 0x40 || secondByte == 0x7f || secondByte == 0xff)
iter.error = true;
return true;
}
}
exports.big5 = big5;
function eucNextChar(iter, det) {
iter.index = iter.nextIndex;
iter.error = false;
let firstByte = 0;
let secondByte = 0;
let thirdByte = 0;
buildChar: {
firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0) {
iter.done = true;
break buildChar;
}
if (firstByte <= 0x8d) {
break buildChar;
}
secondByte = iter.nextByte(det);
iter.charValue = (iter.charValue << 8) | secondByte;
if (firstByte >= 0xa1 && firstByte <= 0xfe) {
if (secondByte < 0xa1) {
iter.error = true;
}
break buildChar;
}
if (firstByte == 0x8e) {
if (secondByte < 0xa1) {
iter.error = true;
}
break buildChar;
}
if (firstByte == 0x8f) {
thirdByte = iter.nextByte(det);
iter.charValue = (iter.charValue << 8) | thirdByte;
if (thirdByte < 0xa1) {
iter.error = true;
}
}
}
return iter.done == false;
}
class euc_jp extends mbcs {
constructor() {
super(...arguments);
this.commonChars = [
0xa1a1,
0xa1a2,
0xa1a3,
0xa1a6,
0xa1bc,
0xa1ca,
0xa1cb,
0xa1d6,
0xa1d7,
0xa4a2,
0xa4a4,
0xa4a6,
0xa4a8,
0xa4aa,
0xa4ab,
0xa4ac,
0xa4ad,
0xa4af,
0xa4b1,
0xa4b3,
0xa4b5,
0xa4b7,
0xa4b9,
0xa4bb,
0xa4bd,
0xa4bf,
0xa4c0,
0xa4c1,
0xa4c3,
0xa4c4,
0xa4c6,
0xa4c7,
0xa4c8,
0xa4c9,
0xa4ca,
0xa4cb,
0xa4ce,
0xa4cf,
0xa4d0,
0xa4de,
0xa4df,
0xa4e1,
0xa4e2,
0xa4e4,
0xa4e8,
0xa4e9,
0xa4ea,
0xa4eb,
0xa4ec,
0xa4ef,
0xa4f2,
0xa4f3,
0xa5a2,
0xa5a3,
0xa5a4,
0xa5a6,
0xa5a7,
0xa5aa,
0xa5ad,
0xa5af,
0xa5b0,
0xa5b3,
0xa5b5,
0xa5b7,
0xa5b8,
0xa5b9,
0xa5bf,
0xa5c3,
0xa5c6,
0xa5c7,
0xa5c8,
0xa5c9,
0xa5cb,
0xa5d0,
0xa5d5,
0xa5d6,
0xa5d7,
0xa5de,
0xa5e0,
0xa5e1,
0xa5e5,
0xa5e9,
0xa5ea,
0xa5eb,
0xa5ec,
0xa5ed,
0xa5f3,
0xb8a9,
0xb9d4,
0xbaee,
0xbbc8,
0xbef0,
0xbfb7,
0xc4ea,
0xc6fc,
0xc7bd,
0xcab8,
0xcaf3,
0xcbdc,
0xcdd1,
];
this.nextChar = eucNextChar;
}
name() {
return 'EUC-JP';
}
language() {
return 'ja';
}
}
exports.euc_jp = euc_jp;
class euc_kr extends mbcs {
constructor() {
super(...arguments);
this.commonChars = [
0xb0a1,
0xb0b3,
0xb0c5,
0xb0cd,
0xb0d4,
0xb0e6,
0xb0ed,
0xb0f8,
0xb0fa,
0xb0fc,
0xb1b8,
0xb1b9,
0xb1c7,
0xb1d7,
0xb1e2,
0xb3aa,
0xb3bb,
0xb4c2,
0xb4cf,
0xb4d9,
0xb4eb,
0xb5a5,
0xb5b5,
0xb5bf,
0xb5c7,
0xb5e9,
0xb6f3,
0xb7af,
0xb7c2,
0xb7ce,
0xb8a6,
0xb8ae,
0xb8b6,
0xb8b8,
0xb8bb,
0xb8e9,
0xb9ab,
0xb9ae,
0xb9cc,
0xb9ce,
0xb9fd,
0xbab8,
0xbace,
0xbad0,
0xbaf1,
0xbbe7,
0xbbf3,
0xbbfd,
0xbcad,
0xbcba,
0xbcd2,
0xbcf6,
0xbdba,
0xbdc0,
0xbdc3,
0xbdc5,
0xbec6,
0xbec8,
0xbedf,
0xbeee,
0xbef8,
0xbefa,
0xbfa1,
0xbfa9,
0xbfc0,
0xbfe4,
0xbfeb,
0xbfec,
0xbff8,
0xc0a7,
0xc0af,
0xc0b8,
0xc0ba,
0xc0bb,
0xc0bd,
0xc0c7,
0xc0cc,
0xc0ce,
0xc0cf,
0xc0d6,
0xc0da,
0xc0e5,
0xc0fb,
0xc0fc,
0xc1a4,
0xc1a6,
0xc1b6,
0xc1d6,
0xc1df,
0xc1f6,
0xc1f8,
0xc4a1,
0xc5cd,
0xc6ae,
0xc7cf,
0xc7d1,
0xc7d2,
0xc7d8,
0xc7e5,
0xc8ad,
];
this.nextChar = eucNextChar;
}
name() {
return 'EUC-KR';
}
language() {
return 'ko';
}
}
exports.euc_kr = euc_kr;
class gb_18030 extends mbcs {
constructor() {
super(...arguments);
this.commonChars = [
0xa1a1,
0xa1a2,
0xa1a3,
0xa1a4,
0xa1b0,
0xa1b1,
0xa1f1,
0xa1f3,
0xa3a1,
0xa3ac,
0xa3ba,
0xb1a8,
0xb1b8,
0xb1be,
0xb2bb,
0xb3c9,
0xb3f6,
0xb4f3,
0xb5bd,
0xb5c4,
0xb5e3,
0xb6af,
0xb6d4,
0xb6e0,
0xb7a2,
0xb7a8,
0xb7bd,
0xb7d6,
0xb7dd,
0xb8b4,
0xb8df,
0xb8f6,
0xb9ab,
0xb9c9,
0xb9d8,
0xb9fa,
0xb9fd,
0xbacd,
0xbba7,
0xbbd6,
0xbbe1,
0xbbfa,
0xbcbc,
0xbcdb,
0xbcfe,
0xbdcc,
0xbecd,
0xbedd,
0xbfb4,
0xbfc6,
0xbfc9,
0xc0b4,
0xc0ed,
0xc1cb,
0xc2db,
0xc3c7,
0xc4dc,
0xc4ea,
0xc5cc,
0xc6f7,
0xc7f8,
0xc8ab,
0xc8cb,
0xc8d5,
0xc8e7,
0xc9cf,
0xc9fa,
0xcab1,
0xcab5,
0xcac7,
0xcad0,
0xcad6,
0xcaf5,
0xcafd,
0xccec,
0xcdf8,
0xceaa,
0xcec4,
0xced2,
0xcee5,
0xcfb5,
0xcfc2,
0xcfd6,
0xd0c2,
0xd0c5,
0xd0d0,
0xd0d4,
0xd1a7,
0xd2aa,
0xd2b2,
0xd2b5,
0xd2bb,
0xd2d4,
0xd3c3,
0xd3d0,
0xd3fd,
0xd4c2,
0xd4da,
0xd5e2,
0xd6d0,
];
}
name() {
return 'GB18030';
}
language() {
return 'zh';
}
nextChar(iter, det) {
iter.index = iter.nextIndex;
iter.error = false;
let firstByte = 0;
let secondByte = 0;
let thirdByte = 0;
let fourthByte = 0;
buildChar: {
firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0) {
iter.done = true;
break buildChar;
}
if (firstByte <= 0x80) {
break buildChar;
}
secondByte = iter.nextByte(det);
iter.charValue = (iter.charValue << 8) | secondByte;
if (firstByte >= 0x81 && firstByte <= 0xfe) {
if ((secondByte >= 0x40 && secondByte <= 0x7e) ||
(secondByte >= 80 && secondByte <= 0xfe)) {
break buildChar;
}
if (secondByte >= 0x30 && secondByte <= 0x39) {
thirdByte = iter.nextByte(det);
if (thirdByte >= 0x81 && thirdByte <= 0xfe) {
fourthByte = iter.nextByte(det);
if (fourthByte >= 0x30 && fourthByte <= 0x39) {
iter.charValue =
(iter.charValue << 16) | (thirdByte << 8) | fourthByte;
break buildChar;
}
}
}
iter.error = true;
break buildChar;
}
}
return iter.done == false;
}
}
exports.gb_18030 = gb_18030;
//# sourceMappingURL=mbcs.js.map

1
lib/encoding/mbcs.js.map Normal file

File diff suppressed because one or more lines are too long

75
lib/encoding/sbcs.d.ts vendored Normal file
View File

@ -0,0 +1,75 @@
import { Context, Recogniser } from '../encoding/index';
import { Match } from '../match';
declare class NGramsPlusLang {
fLang: string;
fNGrams: number[];
constructor(la: string, ng: number[]);
}
declare class sbcs implements Recogniser {
spaceChar: number;
private nGramLang?;
ngrams(): NGramsPlusLang[] | number[];
byteMap(): number[];
name(_input: Context): string;
language(): string | undefined;
match(det: Context): Match | null;
}
export declare class ISO_8859_1 extends sbcs {
byteMap(): number[];
ngrams(): NGramsPlusLang[];
name(input: Context): string;
}
export declare class ISO_8859_2 extends sbcs {
byteMap(): number[];
ngrams(): NGramsPlusLang[];
name(det: Context): string;
}
export declare class ISO_8859_5 extends sbcs {
byteMap(): number[];
ngrams(): number[];
name(): string;
language(): string;
}
export declare class ISO_8859_6 extends sbcs {
byteMap(): number[];
ngrams(): number[];
name(): string;
language(): string;
}
export declare class ISO_8859_7 extends sbcs {
byteMap(): number[];
ngrams(): number[];
name(det: Context): string;
language(): string;
}
export declare class ISO_8859_8 extends sbcs {
byteMap(): number[];
ngrams(): NGramsPlusLang[];
name(det: Context): string;
language(): string;
}
export declare class ISO_8859_9 extends sbcs {
byteMap(): number[];
ngrams(): number[];
name(det: Context): string;
language(): string;
}
export declare class windows_1251 extends sbcs {
byteMap(): number[];
ngrams(): number[];
name(): string;
language(): string;
}
export declare class windows_1256 extends sbcs {
byteMap(): number[];
ngrams(): number[];
name(): string;
language(): string;
}
export declare class KOI8_R extends sbcs {
byteMap(): number[];
ngrams(): number[];
name(): string;
language(): string;
}
export {};

4346
lib/encoding/sbcs.js Normal file

File diff suppressed because it is too large Load Diff

1
lib/encoding/sbcs.js.map Normal file

File diff suppressed because one or more lines are too long

27
lib/encoding/unicode.d.ts vendored Normal file
View File

@ -0,0 +1,27 @@
import { Context, Recogniser } from '.';
import { Match } from '../match';
export declare class UTF_16BE implements Recogniser {
name(): string;
match(det: Context): Match | null;
}
export declare class UTF_16LE implements Recogniser {
name(): string;
match(det: Context): Match | null;
}
interface WithGetChar {
getChar(input: Uint8Array, index: number): number;
}
declare class UTF_32 implements Recogniser, WithGetChar {
name(): string;
getChar(_input: Uint8Array, _index: number): number;
match(det: Context): Match | null;
}
export declare class UTF_32BE extends UTF_32 {
name(): string;
getChar(input: Uint8Array, index: number): number;
}
export declare class UTF_32LE extends UTF_32 {
name(): string;
getChar(input: Uint8Array, index: number): number;
}
export {};

109
lib/encoding/unicode.js Normal file
View File

@ -0,0 +1,109 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.UTF_32LE = exports.UTF_32BE = exports.UTF_16LE = exports.UTF_16BE = void 0;
const match_1 = __importDefault(require("../match"));
class UTF_16BE {
name() {
return 'UTF-16BE';
}
match(det) {
const input = det.rawInput;
if (input.length >= 2 &&
(input[0] & 0xff) == 0xfe &&
(input[1] & 0xff) == 0xff) {
return (0, match_1.default)(det, this, 100);
}
return null;
}
}
exports.UTF_16BE = UTF_16BE;
class UTF_16LE {
name() {
return 'UTF-16LE';
}
match(det) {
const input = det.rawInput;
if (input.length >= 2 &&
(input[0] & 0xff) == 0xff &&
(input[1] & 0xff) == 0xfe) {
if (input.length >= 4 && input[2] == 0x00 && input[3] == 0x00) {
return null;
}
return (0, match_1.default)(det, this, 100);
}
return null;
}
}
exports.UTF_16LE = UTF_16LE;
class UTF_32 {
name() {
return 'UTF-32';
}
getChar(_input, _index) {
return -1;
}
match(det) {
let numValid = 0, numInvalid = 0, hasBOM = false, confidence = 0;
const limit = (det.rawLen / 4) * 4;
const input = det.rawInput;
if (limit == 0) {
return null;
}
if (this.getChar(input, 0) == 0x0000feff) {
hasBOM = true;
}
for (let i = 0; i < limit; i += 4) {
const ch = this.getChar(input, i);
if (ch < 0 || ch >= 0x10ffff || (ch >= 0xd800 && ch <= 0xdfff)) {
numInvalid += 1;
}
else {
numValid += 1;
}
}
if (hasBOM && numInvalid == 0) {
confidence = 100;
}
else if (hasBOM && numValid > numInvalid * 10) {
confidence = 80;
}
else if (numValid > 3 && numInvalid == 0) {
confidence = 100;
}
else if (numValid > 0 && numInvalid == 0) {
confidence = 80;
}
else if (numValid > numInvalid * 10) {
confidence = 25;
}
return confidence == 0 ? null : (0, match_1.default)(det, this, confidence);
}
}
class UTF_32BE extends UTF_32 {
name() {
return 'UTF-32BE';
}
getChar(input, index) {
return (((input[index + 0] & 0xff) << 24) |
((input[index + 1] & 0xff) << 16) |
((input[index + 2] & 0xff) << 8) |
(input[index + 3] & 0xff));
}
}
exports.UTF_32BE = UTF_32BE;
class UTF_32LE extends UTF_32 {
name() {
return 'UTF-32LE';
}
getChar(input, index) {
return (((input[index + 3] & 0xff) << 24) |
((input[index + 2] & 0xff) << 16) |
((input[index + 1] & 0xff) << 8) |
(input[index + 0] & 0xff));
}
}
exports.UTF_32LE = UTF_32LE;
//# sourceMappingURL=unicode.js.map

View File

@ -0,0 +1 @@
{"version":3,"file":"unicode.js","sourceRoot":"","sources":["../../src/encoding/unicode.ts"],"names":[],"mappings":";;;;;;AACA,qDAAwC;AAMxC,MAAa,QAAQ;IACnB,IAAI;QACF,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,GAAY;QAChB,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC;QAE3B,IACE,KAAK,CAAC,MAAM,IAAI,CAAC;YACjB,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI;YACzB,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI,EACzB;YACA,OAAO,IAAA,eAAK,EAAC,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;SAC9B;QAGD,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAnBD,4BAmBC;AAED,MAAa,QAAQ;IACnB,IAAI;QACF,OAAO,UAAU,CAAC;IACpB,CAAC;IACD,KAAK,CAAC,GAAY;QAChB,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC;QAE3B,IACE,KAAK,CAAC,MAAM,IAAI,CAAC;YACjB,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI;YACzB,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI,EACzB;YAEA,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE;gBAE7D,OAAO,IAAI,CAAC;aACb;YACD,OAAO,IAAA,eAAK,EAAC,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;SAC9B;QAGD,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAvBD,4BAuBC;AAMD,MAAM,MAAM;IACV,IAAI;QACF,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,OAAO,CAAC,MAAkB,EAAE,MAAc;QACxC,OAAO,CAAC,CAAC,CAAC;IACZ,CAAC;IAED,KAAK,CAAC,GAAY;QAChB,IAAI,QAAQ,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,EACd,MAAM,GAAG,KAAK,EACd,UAAU,GAAG,CAAC,CAAC;QACjB,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC;QAE3B,IAAI,KAAK,IAAI,CAAC,EAAE;YACd,OAAO,IAAI,CAAC;SACb;QAED,IAAI,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,UAAU,EAAE;YACxC,MAAM,GAAG,IAAI,CAAC;SACf;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,CAAC,EAAE;YACjC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YAElC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,IAAI,QAAQ,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,EAAE;gBAC9D,UAAU,IAAI,CAAC,CAAC;aACjB;iBAAM;gBACL,QAAQ,IAAI,CAAC,CAAC;aACf;SACF;QAID,IAAI,MAAM,IAAI,UAAU,IAAI,CAAC,EAAE;YAC7B,UAAU,GAAG,GAAG,CAAC;SAClB;aAAM,IAAI,MAAM,IAAI,QAAQ,GAAG,UAAU,GAAG,EAAE,EAAE;YAC/C,UAAU,GAAG,EAAE,CAAC;SACjB;aAAM,IAAI,QAAQ,GAAG,CAAC,IAAI,UAAU,IAAI,CAAC,EAAE;YAC1C,UAAU,GAAG,GAAG,CAAC;SAClB;aAAM,IAAI,QAAQ,GAAG,CAAC,IAAI,UAAU,IAAI,CAAC,EAAE;YAC1C,UAAU,GAAG,EAAE,CAAC;SACjB;aAAM,IAAI,QAAQ,GAAG,UAAU,GAAG,EAAE,EAAE;YAErC,UAAU,GAAG,EAAE,CAAC;SACjB;QAGD,OAAO,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAA,eAAK,EAAC,GAAG,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;IAC/D,CAAC;CACF;AAED,MAAa,QAAS,SAAQ,MAAM;IAClC,IAAI;QACF,OAAO,UAAU,CAAC;IACpB,CAAC;IACD,OAAO,CAAC,KAAiB,EAAE,KAAa;QACtC,OAAO,CACL,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YACjC,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YACjC,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;YAChC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAC1B,CAAC;IACJ,CAAC;CACF;AAZD,4BAYC;AAED,MAAa,QAAS,SAAQ,MAAM;IAClC,IAAI;QACF,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,OAAO,CAAC,KAAiB,EAAE,KAAa;QACtC,OAAO,CACL,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YACjC,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YACjC,CAAC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;YAChC,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAC1B,CAAC;IACJ,CAAC;CACF;AAbD,4BAaC"}

6
lib/encoding/utf8.d.ts vendored Normal file
View File

@ -0,0 +1,6 @@
import { Context, Recogniser } from '.';
import { Match } from '../match';
export default class Utf8 implements Recogniser {
name(): string;
match(det: Context): Match | null;
}

72
lib/encoding/utf8.js Normal file
View File

@ -0,0 +1,72 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const match_1 = __importDefault(require("../match"));
class Utf8 {
name() {
return 'UTF-8';
}
match(det) {
let hasBOM = false, numValid = 0, numInvalid = 0, trailBytes = 0, confidence;
const input = det.rawInput;
if (det.rawLen >= 3 &&
(input[0] & 0xff) == 0xef &&
(input[1] & 0xff) == 0xbb &&
(input[2] & 0xff) == 0xbf) {
hasBOM = true;
}
for (let i = 0; i < det.rawLen; i++) {
const b = input[i];
if ((b & 0x80) == 0)
continue;
if ((b & 0x0e0) == 0x0c0) {
trailBytes = 1;
}
else if ((b & 0x0f0) == 0x0e0) {
trailBytes = 2;
}
else if ((b & 0x0f8) == 0xf0) {
trailBytes = 3;
}
else {
numInvalid++;
if (numInvalid > 5)
break;
trailBytes = 0;
}
for (;;) {
i++;
if (i >= det.rawLen)
break;
if ((input[i] & 0xc0) != 0x080) {
numInvalid++;
break;
}
if (--trailBytes == 0) {
numValid++;
break;
}
}
}
confidence = 0;
if (hasBOM && numInvalid == 0)
confidence = 100;
else if (hasBOM && numValid > numInvalid * 10)
confidence = 80;
else if (numValid > 3 && numInvalid == 0)
confidence = 100;
else if (numValid > 0 && numInvalid == 0)
confidence = 80;
else if (numValid == 0 && numInvalid == 0)
confidence = 10;
else if (numValid > numInvalid * 10)
confidence = 25;
else
return null;
return (0, match_1.default)(det, this, confidence);
}
}
exports.default = Utf8;
//# sourceMappingURL=utf8.js.map

1
lib/encoding/utf8.js.map Normal file
View File

@ -0,0 +1 @@
{"version":3,"file":"utf8.js","sourceRoot":"","sources":["../../src/encoding/utf8.ts"],"names":[],"mappings":";;;;;AACA,qDAAwC;AAExC,MAAqB,IAAI;IACvB,IAAI;QACF,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,GAAY;QAChB,IAAI,MAAM,GAAG,KAAK,EAChB,QAAQ,GAAG,CAAC,EACZ,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,EACd,UAAU,CAAC;QACb,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC;QAE3B,IACE,GAAG,CAAC,MAAM,IAAI,CAAC;YACf,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI;YACzB,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI;YACzB,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI,EACzB;YACA,MAAM,GAAG,IAAI,CAAC;SACf;QAGD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACnC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACnB,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC;gBAAE,SAAS;YAG9B,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,KAAK,EAAE;gBACxB,UAAU,GAAG,CAAC,CAAC;aAChB;iBAAM,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,KAAK,EAAE;gBAC/B,UAAU,GAAG,CAAC,CAAC;aAChB;iBAAM,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,IAAI,EAAE;gBAC9B,UAAU,GAAG,CAAC,CAAC;aAChB;iBAAM;gBACL,UAAU,EAAE,CAAC;gBACb,IAAI,UAAU,GAAG,CAAC;oBAAE,MAAM;gBAC1B,UAAU,GAAG,CAAC,CAAC;aAChB;YAGD,SAAS;gBACP,CAAC,EAAE,CAAC;gBACJ,IAAI,CAAC,IAAI,GAAG,CAAC,MAAM;oBAAE,MAAM;gBAE3B,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,KAAK,EAAE;oBAC9B,UAAU,EAAE,CAAC;oBACb,MAAM;iBACP;gBACD,IAAI,EAAE,UAAU,IAAI,CAAC,EAAE;oBACrB,QAAQ,EAAE,CAAC;oBACX,MAAM;iBACP;aACF;SACF;QAID,UAAU,GAAG,CAAC,CAAC;QACf,IAAI,MAAM,IAAI,UAAU,IAAI,CAAC;YAAE,UAAU,GAAG,GAAG,CAAC;aAC3C,IAAI,MAAM,IAAI,QAAQ,GAAG,UAAU,GAAG,EAAE;YAAE,UAAU,GAAG,EAAE,CAAC;aAC1D,IAAI,QAAQ,GAAG,CAAC,IAAI,UAAU,IAAI,CAAC;YAAE,UAAU,GAAG,GAAG,CAAC;aACtD,IAAI,QAAQ,GAAG,CAAC,IAAI,UAAU,IAAI,CAAC;YAAE,UAAU,GAAG,EAAE,CAAC;aACrD,IAAI,QAAQ,IAAI,CAAC,IAAI,UAAU,IAAI,CAAC;YAEvC,UAAU,GAAG,EAAE,CAAC;aACb,IAAI,QAAQ,GAAG,UAAU,GAAG,EAAE;YAEjC,UAAU,GAAG,EAAE,CAAC;;YACb,OAAO,IAAI,CAAC;QAEjB,OAAO,IAAA,eAAK,EAAC,GAAG,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC;IACtC,CAAC;CACF;AAzED,uBAyEC"}

2
lib/fs/browser.d.ts vendored Normal file
View File

@ -0,0 +1,2 @@
declare const _default: () => never;
export default _default;

6
lib/fs/browser.js Normal file
View File

@ -0,0 +1,6 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.default = () => {
throw new Error('File system is not available');
};
//# sourceMappingURL=browser.js.map

1
lib/fs/browser.js.map Normal file
View File

@ -0,0 +1 @@
{"version":3,"file":"browser.js","sourceRoot":"","sources":["../../src/fs/browser.ts"],"names":[],"mappings":";;AAAA,kBAAe,GAAG,EAAE;IAClB,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;AAClD,CAAC,CAAC"}

2
lib/fs/node.d.ts vendored Normal file
View File

@ -0,0 +1,2 @@
declare const _default: () => any;
export default _default;

11
lib/fs/node.js Normal file
View File

@ -0,0 +1,11 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
let fsModule;
exports.default = () => {
if (typeof module === 'object' && typeof module.exports === 'object') {
fsModule = fsModule ? fsModule : require('fs');
return fsModule;
}
throw new Error('File system is not available');
};
//# sourceMappingURL=node.js.map

1
lib/fs/node.js.map Normal file
View File

@ -0,0 +1 @@
{"version":3,"file":"node.js","sourceRoot":"","sources":["../../src/fs/node.ts"],"names":[],"mappings":";;AAAA,IAAI,QAAa,CAAC;AAElB,kBAAe,GAAG,EAAE;IAClB,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,OAAO,MAAM,CAAC,OAAO,KAAK,QAAQ,EAAE;QACpE,QAAQ,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC/C,OAAO,QAAQ,CAAC;KACjB;IACD,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;AAClD,CAAC,CAAC"}

18
lib/index.d.ts vendored Normal file
View File

@ -0,0 +1,18 @@
import { Match } from './match';
interface FullOptions {
sampleSize: number;
position: number;
}
declare type Options = Partial<FullOptions>;
declare type DetectResult = Match[] | string | null;
export declare const detect: (buffer: Uint8Array) => string | null;
export declare const analyse: (buffer: Uint8Array) => Match[];
export declare const detectFile: (filepath: string, opts?: Options) => Promise<DetectResult>;
export declare const detectFileSync: (filepath: string, opts?: Options) => DetectResult;
declare const _default: {
analyse: (buffer: Uint8Array) => Match[];
detect: (buffer: Uint8Array) => string | null;
detectFileSync: (filepath: string, opts?: Partial<FullOptions>) => DetectResult;
detectFile: (filepath: string, opts?: Partial<FullOptions>) => Promise<DetectResult>;
};
export default _default;

143
lib/index.js Normal file
View File

@ -0,0 +1,143 @@
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.detectFileSync = exports.detectFile = exports.analyse = exports.detect = void 0;
const node_1 = __importDefault(require("./fs/node"));
const utf8_1 = __importDefault(require("./encoding/utf8"));
const unicode = __importStar(require("./encoding/unicode"));
const mbcs = __importStar(require("./encoding/mbcs"));
const sbcs = __importStar(require("./encoding/sbcs"));
const iso2022 = __importStar(require("./encoding/iso2022"));
const recognisers = [
new utf8_1.default(),
new unicode.UTF_16BE(),
new unicode.UTF_16LE(),
new unicode.UTF_32BE(),
new unicode.UTF_32LE(),
new mbcs.sjis(),
new mbcs.big5(),
new mbcs.euc_jp(),
new mbcs.euc_kr(),
new mbcs.gb_18030(),
new iso2022.ISO_2022_JP(),
new iso2022.ISO_2022_KR(),
new iso2022.ISO_2022_CN(),
new sbcs.ISO_8859_1(),
new sbcs.ISO_8859_2(),
new sbcs.ISO_8859_5(),
new sbcs.ISO_8859_6(),
new sbcs.ISO_8859_7(),
new sbcs.ISO_8859_8(),
new sbcs.ISO_8859_9(),
new sbcs.windows_1251(),
new sbcs.windows_1256(),
new sbcs.KOI8_R(),
];
const detect = (buffer) => {
const matches = (0, exports.analyse)(buffer);
return matches.length > 0 ? matches[0].name : null;
};
exports.detect = detect;
const analyse = (buffer) => {
const byteStats = [];
for (let i = 0; i < 256; i++)
byteStats[i] = 0;
for (let i = buffer.length - 1; i >= 0; i--)
byteStats[buffer[i] & 0x00ff]++;
let c1Bytes = false;
for (let i = 0x80; i <= 0x9f; i += 1) {
if (byteStats[i] !== 0) {
c1Bytes = true;
break;
}
}
const context = {
byteStats,
c1Bytes,
rawInput: buffer,
rawLen: buffer.length,
inputBytes: buffer,
inputLen: buffer.length,
};
const matches = recognisers
.map((rec) => {
return rec.match(context);
})
.filter((match) => {
return !!match;
})
.sort((a, b) => {
return b.confidence - a.confidence;
});
return matches;
};
exports.analyse = analyse;
const detectFile = (filepath, opts = {}) => new Promise((resolve, reject) => {
let fd;
const fs = (0, node_1.default)();
const handler = (err, buffer) => {
if (fd) {
fs.closeSync(fd);
}
if (err) {
reject(err);
}
else {
resolve((0, exports.detect)(buffer));
}
};
if (opts && opts.sampleSize) {
fd = fs.openSync(filepath, 'r');
const sample = Buffer.allocUnsafe(opts.sampleSize);
fs.read(fd, sample, 0, opts.sampleSize, opts.position, (err) => {
handler(err, sample);
});
return;
}
fs.readFile(filepath, handler);
});
exports.detectFile = detectFile;
const detectFileSync = (filepath, opts = {}) => {
const fs = (0, node_1.default)();
if (opts && opts.sampleSize) {
const fd = fs.openSync(filepath, 'r');
const sample = Buffer.allocUnsafe(opts.sampleSize);
fs.readSync(fd, sample, 0, opts.sampleSize, opts.position);
fs.closeSync(fd);
return (0, exports.detect)(sample);
}
return (0, exports.detect)(fs.readFileSync(filepath));
};
exports.detectFileSync = detectFileSync;
exports.default = {
analyse: exports.analyse,
detect: exports.detect,
detectFileSync: exports.detectFileSync,
detectFile: exports.detectFile,
};
//# sourceMappingURL=index.js.map

1
lib/index.js.map Normal file
View File

@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAGA,qDAA+B;AAE/B,2DAAmC;AACnC,4DAA8C;AAC9C,sDAAwC;AACxC,sDAAwC;AACxC,4DAA8C;AAS9C,MAAM,WAAW,GAAiB;IAChC,IAAI,cAAI,EAAE;IACV,IAAI,OAAO,CAAC,QAAQ,EAAE;IACtB,IAAI,OAAO,CAAC,QAAQ,EAAE;IACtB,IAAI,OAAO,CAAC,QAAQ,EAAE;IACtB,IAAI,OAAO,CAAC,QAAQ,EAAE;IACtB,IAAI,IAAI,CAAC,IAAI,EAAE;IACf,IAAI,IAAI,CAAC,IAAI,EAAE;IACf,IAAI,IAAI,CAAC,MAAM,EAAE;IACjB,IAAI,IAAI,CAAC,MAAM,EAAE;IACjB,IAAI,IAAI,CAAC,QAAQ,EAAE;IACnB,IAAI,OAAO,CAAC,WAAW,EAAE;IACzB,IAAI,OAAO,CAAC,WAAW,EAAE;IACzB,IAAI,OAAO,CAAC,WAAW,EAAE;IACzB,IAAI,IAAI,CAAC,UAAU,EAAE;IACrB,IAAI,IAAI,CAAC,UAAU,EAAE;IACrB,IAAI,IAAI,CAAC,UAAU,EAAE;IACrB,IAAI,IAAI,CAAC,UAAU,EAAE;IACrB,IAAI,IAAI,CAAC,UAAU,EAAE;IACrB,IAAI,IAAI,CAAC,UAAU,EAAE;IACrB,IAAI,IAAI,CAAC,UAAU,EAAE;IACrB,IAAI,IAAI,CAAC,YAAY,EAAE;IACvB,IAAI,IAAI,CAAC,YAAY,EAAE;IACvB,IAAI,IAAI,CAAC,MAAM,EAAE;CAClB,CAAC;AAIK,MAAM,MAAM,GAAG,CAAC,MAAkB,EAAiB,EAAE;IAC1D,MAAM,OAAO,GAAY,IAAA,eAAO,EAAC,MAAM,CAAC,CAAC;IACzC,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;AACrD,CAAC,CAAC;AAHW,QAAA,MAAM,UAGjB;AAEK,MAAM,OAAO,GAAG,CAAC,MAAkB,EAAW,EAAE;IAErD,MAAM,SAAS,GAAG,EAAE,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;QAAE,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAE/C,KAAK,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;QAAE,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,EAAE,CAAC;IAE7E,IAAI,OAAO,GAAG,KAAK,CAAC;IACpB,KAAK,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,IAAI,CAAC,EAAE;QACpC,IAAI,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE;YACtB,OAAO,GAAG,IAAI,CAAC;YACf,MAAM;SACP;KACF;IAED,MAAM,OAAO,GAAY;QACvB,SAAS;QACT,OAAO;QACP,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,UAAU,EAAE,MAAM;QAClB,QAAQ,EAAE,MAAM,CAAC,MAAM;KACxB,CAAC;IAEF,MAAM,OAAO,GAAG,WAAW;SACxB,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACX,OAAO,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC5B,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAChB,OAAO,CAAC,CAAC,KAAK,CAAC;IACjB,CAAC,CAAC;SACD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACb,OAAO,CAAE,CAAC,UAAU,GAAG,CAAE,CAAC,UAAU,CAAC;IACvC,CAAC,CAAC,CAAC;IAEL,OAAO,OAAkB,CAAC;AAC5B,CAAC,CAAA;AApCY,QAAA,OAAO,WAoCnB;AAEM,MAAM,UAAU,GAAG,CAAC,QAAgB,EAAE,OAAgB,EAAE,EAAyB,EAAE,CACxF,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;IAC9B,IAAI,EAAO,CAAC;IACZ,MAAM,EAAE,GAAG,IAAA,cAAM,GAAE,CAAC;IAEpB,MAAM,OAAO,GAAG,CAAC,GAA6B,EAAE,MAAc,EAAE,EAAE;QAChE,IAAI,EAAE,EAAE;YACN,EAAE,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;SAClB;QAED,IAAI,GAAG,EAAE;YACP,MAAM,CAAC,GAAG,CAAC,CAAC;SACb;aAAM;YACL,OAAO,CAAC,IAAA,cAAM,EAAC,MAAM,CAAC,CAAC,CAAC;SACzB;IACH,CAAC,CAAC;IAEF,IAAI,IAAI,IAAI,IAAI,CAAC,UAAU,EAAE;QAC3B,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAChC,MAAM,MAAM,GAAW,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAE3D,EAAE,CAAC,IAAI,CAAC,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,GAAW,EAAE,EAAE;YACrE,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QACvB,CAAC,CAAC,CAAC;QACH,OAAO;KACR;IAED,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;AACjC,CAAC,CAAC,CAAC;AA5BQ,QAAA,UAAU,cA4BlB;AAEE,MAAM,cAAc,GAAG,CAAC,QAAgB,EAAE,OAAgB,EAAE,EAAgB,EAAE;IACnF,MAAM,EAAE,GAAG,IAAA,cAAM,GAAE,CAAC;IAEpB,IAAI,IAAI,IAAI,IAAI,CAAC,UAAU,EAAE;QAC3B,MAAM,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAEnD,EAAE,CAAC,QAAQ,CAAC,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3D,EAAE,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;QACjB,OAAO,IAAA,cAAM,EAAC,MAAM,CAAC,CAAC;KACvB;IAED,OAAO,IAAA,cAAM,EAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC;AAC3C,CAAC,CAAC;AAbW,QAAA,cAAc,kBAazB;AAEF,kBAAe;IACb,OAAO,EAAP,eAAO;IACP,MAAM,EAAN,cAAM;IACN,cAAc,EAAd,sBAAc;IACd,UAAU,EAAV,kBAAU;CACX,CAAC"}

8
lib/match.d.ts vendored Normal file
View File

@ -0,0 +1,8 @@
import { Context, Recogniser } from "./encoding";
export interface Match {
confidence: number;
name: string;
lang?: string;
}
declare const _default: (ctx: Context, rec: Recogniser, confidence: number) => Match;
export default _default;

8
lib/match.js Normal file
View File

@ -0,0 +1,8 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.default = (ctx, rec, confidence) => ({
confidence,
name: rec.name(ctx),
lang: rec.language ? rec.language() : undefined,
});
//# sourceMappingURL=match.js.map

1
lib/match.js.map Normal file
View File

@ -0,0 +1 @@
{"version":3,"file":"match.js","sourceRoot":"","sources":["../src/match.ts"],"names":[],"mappings":";;AAQA,kBAAe,CAAC,GAAY,EAAE,GAAe,EAAE,UAAkB,EAAS,EAAE,CAAC,CAAC;IAC5E,UAAU;IACV,IAAI,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;IACnB,IAAI,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,SAAS;CAChD,CAAC,CAAC"}