chardet/src/encoding/sbcs.ts

910 lines
40 KiB
TypeScript

import { Context, Recogniser } from '../encoding/index';
import match, { Match } from '../match';
/**
* This class recognizes single-byte encodings. Because the encoding scheme is so
* simple, language statistics are used to do the matching.
*/
const N_GRAM_MASK = 0xffffff;
class NGramParser {
byteIndex: number = 0;
ngram: number = 0;
ngramCount: number = 0;
hitCount: number = 0;
ngramList: number[];
byteMap: number[];
// TODO: is it safe to set it like this?
spaceChar: number = 0x20;
constructor(theNgramList: number[], theByteMap: number[]) {
this.ngramList = theNgramList;
this.byteMap = theByteMap;
}
/*
* Binary search for value in table, which must have exactly 64 entries.
*/
search(table: number[], value: number) {
let index = 0;
if (table[index + 32] <= value) index += 32;
if (table[index + 16] <= value) index += 16;
if (table[index + 8] <= value) index += 8;
if (table[index + 4] <= value) index += 4;
if (table[index + 2] <= value) index += 2;
if (table[index + 1] <= value) index += 1;
if (table[index] > value) index -= 1;
if (index < 0 || table[index] != value) return -1;
return index;
}
lookup(thisNgram: number) {
this.ngramCount += 1;
if (this.search(this.ngramList, thisNgram) >= 0) {
this.hitCount += 1;
}
}
addByte(b: number) {
this.ngram = ((this.ngram << 8) + (b & 0xff)) & N_GRAM_MASK;
this.lookup(this.ngram);
}
nextByte(det: Context) {
if (this.byteIndex >= det.inputLen) return -1;
return det.inputBytes[this.byteIndex++] & 0xff;
}
parse(det: Context, spaceCh: number) {
let b,
ignoreSpace = false;
this.spaceChar = spaceCh;
while ((b = this.nextByte(det)) >= 0) {
const mb = this.byteMap[b];
// TODO: 0x20 might not be a space in all character sets...
if (mb != 0) {
if (!(mb == this.spaceChar && ignoreSpace)) {
this.addByte(mb);
}
ignoreSpace = mb == this.spaceChar;
}
}
// TODO: Is this OK? The buffer could have ended in the middle of a word...
this.addByte(this.spaceChar);
const rawPercent = this.hitCount / this.ngramCount;
// TODO - This is a bit of a hack to take care of a case
// were we were getting a confidence of 135...
if (rawPercent > 0.33) return 98;
return Math.floor(rawPercent * 300.0);
}
}
class NGramsPlusLang {
fLang: string;
fNGrams: number[];
constructor(la: string, ng: number[]) {
this.fLang = la;
this.fNGrams = ng;
}
}
const isFlatNgrams = (val: NGramsPlusLang[] | number[]): val is number[] =>
Array.isArray(val) && isFinite(val[0] as number);
class sbcs implements Recogniser {
spaceChar = 0x20;
private nGramLang?: string = undefined;
ngrams(): NGramsPlusLang[] | number[] {
return [];
}
byteMap(): number[] {
return [];
}
name(_input: Context): string {
return 'sbcs';
}
language(): string | undefined {
return this.nGramLang;
}
match(det: Context): Match | null {
// This feels a bit dirty. Simpler alternative would be
// splitting classes ISO_8859_1 etc into language-specific ones
// with hardcoded languages like ISO_8859_9.
this.nGramLang = undefined;
const ngrams = this.ngrams();
if (isFlatNgrams(ngrams)) {
const parser = new NGramParser(ngrams, this.byteMap());
const confidence = parser.parse(det, this.spaceChar);
return confidence <= 0 ? null : match(det, this, confidence);
}
let bestConfidence = -1;
for (let i = ngrams.length - 1; i >= 0; i--) {
const ngl = ngrams[i];
const parser = new NGramParser(ngl.fNGrams, this.byteMap());
const confidence = parser.parse(det, this.spaceChar);
if (confidence > bestConfidence) {
bestConfidence = confidence;
this.nGramLang = ngl.fLang;
}
}
return bestConfidence <= 0 ? null : match(det, this, bestConfidence);
}
}
export class ISO_8859_1 extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0xaa, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0xb5, 0x20, 0x20, 0x20, 0x20, 0xba, 0x20, 0x20, 0x20, 0x20, 0x20,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb,
0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0x20,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0x20, 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff,
];
}
ngrams() {
return [
new NGramsPlusLang(
'da',
[
0x206166, 0x206174, 0x206465, 0x20656e, 0x206572, 0x20666f, 0x206861,
0x206920, 0x206d65, 0x206f67, 0x2070e5, 0x207369, 0x207374, 0x207469,
0x207669, 0x616620, 0x616e20, 0x616e64, 0x617220, 0x617420, 0x646520,
0x64656e, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656e20,
0x656e64, 0x657220, 0x657265, 0x657320, 0x657420, 0x666f72, 0x676520,
0x67656e, 0x676572, 0x696765, 0x696c20, 0x696e67, 0x6b6520, 0x6b6b65,
0x6c6572, 0x6c6967, 0x6c6c65, 0x6d6564, 0x6e6465, 0x6e6520, 0x6e6720,
0x6e6765, 0x6f6720, 0x6f6d20, 0x6f7220, 0x70e520, 0x722064, 0x722065,
0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696c,
0x766572,
]
),
new NGramsPlusLang(
'de',
[
0x20616e, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569,
0x206765, 0x206861, 0x20696e, 0x206d69, 0x207363, 0x207365, 0x20756e,
0x207665, 0x20766f, 0x207765, 0x207a75, 0x626572, 0x636820, 0x636865,
0x636874, 0x646173, 0x64656e, 0x646572, 0x646965, 0x652064, 0x652073,
0x65696e, 0x656974, 0x656e20, 0x657220, 0x657320, 0x67656e, 0x68656e,
0x687420, 0x696368, 0x696520, 0x696e20, 0x696e65, 0x697420, 0x6c6963,
0x6c6c65, 0x6e2061, 0x6e2064, 0x6e2073, 0x6e6420, 0x6e6465, 0x6e6520,
0x6e6720, 0x6e6765, 0x6e7465, 0x722064, 0x726465, 0x726569, 0x736368,
0x737465, 0x742064, 0x746520, 0x74656e, 0x746572, 0x756e64, 0x756e67,
0x766572,
]
),
new NGramsPlusLang(
'en',
[
0x206120, 0x20616e, 0x206265, 0x20636f, 0x20666f, 0x206861, 0x206865,
0x20696e, 0x206d61, 0x206f66, 0x207072, 0x207265, 0x207361, 0x207374,
0x207468, 0x20746f, 0x207768, 0x616964, 0x616c20, 0x616e20, 0x616e64,
0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061,
0x652073, 0x652074, 0x656420, 0x656e74, 0x657220, 0x657320, 0x666f72,
0x686174, 0x686520, 0x686572, 0x696420, 0x696e20, 0x696e67, 0x696f6e,
0x697320, 0x6e2061, 0x6e2074, 0x6e6420, 0x6e6720, 0x6e7420, 0x6f6620,
0x6f6e20, 0x6f7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169,
0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696f, 0x746f20,
0x747320,
]
),
new NGramsPlusLang(
'es',
[
0x206120, 0x206361, 0x20636f, 0x206465, 0x20656c, 0x20656e, 0x206573,
0x20696e, 0x206c61, 0x206c6f, 0x207061, 0x20706f, 0x207072, 0x207175,
0x207265, 0x207365, 0x20756e, 0x207920, 0x612063, 0x612064, 0x612065,
0x61206c, 0x612070, 0x616369, 0x61646f, 0x616c20, 0x617220, 0x617320,
0x6369f3, 0x636f6e, 0x646520, 0x64656c, 0x646f20, 0x652064, 0x652065,
0x65206c, 0x656c20, 0x656e20, 0x656e74, 0x657320, 0x657374, 0x69656e,
0x69f36e, 0x6c6120, 0x6c6f73, 0x6e2065, 0x6e7465, 0x6f2064, 0x6f2065,
0x6f6e20, 0x6f7220, 0x6f7320, 0x706172, 0x717565, 0x726120, 0x726573,
0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746f20, 0x756520,
0xf36e20,
]
),
new NGramsPlusLang(
'fr',
[
0x206175, 0x20636f, 0x206461, 0x206465, 0x206475, 0x20656e, 0x206574,
0x206c61, 0x206c65, 0x207061, 0x20706f, 0x207072, 0x207175, 0x207365,
0x20736f, 0x20756e, 0x20e020, 0x616e74, 0x617469, 0x636520, 0x636f6e,
0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065,
0x65206c, 0x652070, 0x652073, 0x656e20, 0x656e74, 0x657220, 0x657320,
0x657420, 0x657572, 0x696f6e, 0x697320, 0x697420, 0x6c6120, 0x6c6520,
0x6c6573, 0x6d656e, 0x6e2064, 0x6e6520, 0x6e7320, 0x6e7420, 0x6f6e20,
0x6f6e74, 0x6f7572, 0x717565, 0x72206c, 0x726520, 0x732061, 0x732064,
0x732065, 0x73206c, 0x732070, 0x742064, 0x746520, 0x74696f, 0x756520,
0x757220,
]
),
new NGramsPlusLang(
'it',
[
0x20616c, 0x206368, 0x20636f, 0x206465, 0x206469, 0x206520, 0x20696c,
0x20696e, 0x206c61, 0x207065, 0x207072, 0x20756e, 0x612063, 0x612064,
0x612070, 0x612073, 0x61746f, 0x636865, 0x636f6e, 0x64656c, 0x646920,
0x652061, 0x652063, 0x652064, 0x652069, 0x65206c, 0x652070, 0x652073,
0x656c20, 0x656c6c, 0x656e74, 0x657220, 0x686520, 0x692061, 0x692063,
0x692064, 0x692073, 0x696120, 0x696c20, 0x696e20, 0x696f6e, 0x6c6120,
0x6c6520, 0x6c6920, 0x6c6c61, 0x6e6520, 0x6e6920, 0x6e6f20, 0x6e7465,
0x6f2061, 0x6f2064, 0x6f2069, 0x6f2073, 0x6f6e20, 0x6f6e65, 0x706572,
0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746f20,
0x7a696f,
]
),
new NGramsPlusLang(
'nl',
[
0x20616c, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656e,
0x206765, 0x206865, 0x20696e, 0x206d61, 0x206d65, 0x206f70, 0x207465,
0x207661, 0x207665, 0x20766f, 0x207765, 0x207a69, 0x61616e, 0x616172,
0x616e20, 0x616e64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656e,
0x646572, 0x652062, 0x652076, 0x65656e, 0x656572, 0x656e20, 0x657220,
0x657273, 0x657420, 0x67656e, 0x686574, 0x696520, 0x696e20, 0x696e67,
0x697320, 0x6e2062, 0x6e2064, 0x6e2065, 0x6e2068, 0x6e206f, 0x6e2076,
0x6e6465, 0x6e6720, 0x6f6e64, 0x6f6f72, 0x6f7020, 0x6f7220, 0x736368,
0x737465, 0x742064, 0x746520, 0x74656e, 0x746572, 0x76616e, 0x766572,
0x766f6f,
]
),
new NGramsPlusLang(
'no',
[
0x206174, 0x206176, 0x206465, 0x20656e, 0x206572, 0x20666f, 0x206861,
0x206920, 0x206d65, 0x206f67, 0x2070e5, 0x207365, 0x20736b, 0x20736f,
0x207374, 0x207469, 0x207669, 0x20e520, 0x616e64, 0x617220, 0x617420,
0x646520, 0x64656e, 0x646574, 0x652073, 0x656420, 0x656e20, 0x656e65,
0x657220, 0x657265, 0x657420, 0x657474, 0x666f72, 0x67656e, 0x696b6b,
0x696c20, 0x696e67, 0x6b6520, 0x6b6b65, 0x6c6520, 0x6c6c65, 0x6d6564,
0x6d656e, 0x6e2073, 0x6e6520, 0x6e6720, 0x6e6765, 0x6e6e65, 0x6f6720,
0x6f6d20, 0x6f7220, 0x70e520, 0x722073, 0x726520, 0x736f6d, 0x737465,
0x742073, 0x746520, 0x74656e, 0x746572, 0x74696c, 0x747420, 0x747465,
0x766572,
]
),
new NGramsPlusLang(
'pt',
[
0x206120, 0x20636f, 0x206461, 0x206465, 0x20646f, 0x206520, 0x206573,
0x206d61, 0x206e6f, 0x206f20, 0x207061, 0x20706f, 0x207072, 0x207175,
0x207265, 0x207365, 0x20756d, 0x612061, 0x612063, 0x612064, 0x612070,
0x616465, 0x61646f, 0x616c20, 0x617220, 0x617261, 0x617320, 0x636f6d,
0x636f6e, 0x646120, 0x646520, 0x646f20, 0x646f73, 0x652061, 0x652064,
0x656d20, 0x656e74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6d656e,
0x6e7465, 0x6e746f, 0x6f2061, 0x6f2063, 0x6f2064, 0x6f2065, 0x6f2070,
0x6f7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064,
0x732065, 0x732070, 0x737461, 0x746520, 0x746f20, 0x756520, 0xe36f20,
0xe7e36f,
]
),
new NGramsPlusLang(
'sv',
[
0x206174, 0x206176, 0x206465, 0x20656e, 0x2066f6, 0x206861, 0x206920,
0x20696e, 0x206b6f, 0x206d65, 0x206f63, 0x2070e5, 0x20736b, 0x20736f,
0x207374, 0x207469, 0x207661, 0x207669, 0x20e472, 0x616465, 0x616e20,
0x616e64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656e, 0x646572,
0x646574, 0x656420, 0x656e20, 0x657220, 0x657420, 0x66f672, 0x67656e,
0x696c6c, 0x696e67, 0x6b6120, 0x6c6c20, 0x6d6564, 0x6e2073, 0x6e6120,
0x6e6465, 0x6e6720, 0x6e6765, 0x6e696e, 0x6f6368, 0x6f6d20, 0x6f6e20,
0x70e520, 0x722061, 0x722073, 0x726120, 0x736b61, 0x736f6d, 0x742073,
0x746120, 0x746520, 0x746572, 0x74696c, 0x747420, 0x766172, 0xe47220,
0xf67220,
]
),
];
}
name(input: Context): string {
return input && input.c1Bytes ? 'windows-1252' : 'ISO-8859-1';
}
}
export class ISO_8859_2 extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0xb1, 0x20, 0xb3, 0x20, 0xb5, 0xb6, 0x20,
0x20, 0xb9, 0xba, 0xbb, 0xbc, 0x20, 0xbe, 0xbf, 0x20, 0xb1, 0x20, 0xb3,
0x20, 0xb5, 0xb6, 0xb7, 0x20, 0xb9, 0xba, 0xbb, 0xbc, 0x20, 0xbe, 0xbf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb,
0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0x20,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0x20, 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0x20,
];
}
ngrams() {
return [
new NGramsPlusLang(
'cs',
[
0x206120, 0x206279, 0x20646f, 0x206a65, 0x206e61, 0x206e65, 0x206f20,
0x206f64, 0x20706f, 0x207072, 0x2070f8, 0x20726f, 0x207365, 0x20736f,
0x207374, 0x20746f, 0x207620, 0x207679, 0x207a61, 0x612070, 0x636520,
0x636820, 0x652070, 0x652073, 0x652076, 0x656d20, 0x656eed, 0x686f20,
0x686f64, 0x697374, 0x6a6520, 0x6b7465, 0x6c6520, 0x6c6920, 0x6e6120,
0x6ee920, 0x6eec20, 0x6eed20, 0x6f2070, 0x6f646e, 0x6f6a69, 0x6f7374,
0x6f7520, 0x6f7661, 0x706f64, 0x706f6a, 0x70726f, 0x70f865, 0x736520,
0x736f75, 0x737461, 0x737469, 0x73746e, 0x746572, 0x746eed, 0x746f20,
0x752070, 0xbe6520, 0xe16eed, 0xe9686f, 0xed2070, 0xed2073, 0xed6d20,
0xf86564,
]
),
new NGramsPlusLang(
'hu',
[
0x206120, 0x20617a, 0x206265, 0x206567, 0x20656c, 0x206665, 0x206861,
0x20686f, 0x206973, 0x206b65, 0x206b69, 0x206bf6, 0x206c65, 0x206d61,
0x206d65, 0x206d69, 0x206e65, 0x20737a, 0x207465, 0x20e973, 0x612061,
0x61206b, 0x61206d, 0x612073, 0x616b20, 0x616e20, 0x617a20, 0x62616e,
0x62656e, 0x656779, 0x656b20, 0x656c20, 0x656c65, 0x656d20, 0x656e20,
0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686f67, 0x696e74,
0x697320, 0x6b2061, 0x6bf67a, 0x6d6567, 0x6d696e, 0x6e2061, 0x6e616b,
0x6e656b, 0x6e656d, 0x6e7420, 0x6f6779, 0x732061, 0x737a65, 0x737a74,
0x737ae1, 0x73e967, 0x742061, 0x747420, 0x74e173, 0x7a6572, 0xe16e20,
0xe97320,
]
),
new NGramsPlusLang(
'pl',
[
0x20637a, 0x20646f, 0x206920, 0x206a65, 0x206b6f, 0x206d61, 0x206d69,
0x206e61, 0x206e69, 0x206f64, 0x20706f, 0x207072, 0x207369, 0x207720,
0x207769, 0x207779, 0x207a20, 0x207a61, 0x612070, 0x612077, 0x616e69,
0x636820, 0x637a65, 0x637a79, 0x646f20, 0x647a69, 0x652070, 0x652073,
0x652077, 0x65207a, 0x65676f, 0x656a20, 0x656d20, 0x656e69, 0x676f20,
0x696120, 0x696520, 0x69656a, 0x6b6120, 0x6b6920, 0x6b6965, 0x6d6965,
0x6e6120, 0x6e6961, 0x6e6965, 0x6f2070, 0x6f7761, 0x6f7769, 0x706f6c,
0x707261, 0x70726f, 0x70727a, 0x727a65, 0x727a79, 0x7369ea, 0x736b69,
0x737461, 0x776965, 0x796368, 0x796d20, 0x7a6520, 0x7a6965, 0x7a7920,
0xf37720,
]
),
new NGramsPlusLang(
'ro',
[
0x206120, 0x206163, 0x206361, 0x206365, 0x20636f, 0x206375, 0x206465,
0x206469, 0x206c61, 0x206d61, 0x207065, 0x207072, 0x207365, 0x2073e3,
0x20756e, 0x20ba69, 0x20ee6e, 0x612063, 0x612064, 0x617265, 0x617420,
0x617465, 0x617520, 0x636172, 0x636f6e, 0x637520, 0x63e320, 0x646520,
0x652061, 0x652063, 0x652064, 0x652070, 0x652073, 0x656120, 0x656920,
0x656c65, 0x656e74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070,
0x696520, 0x696920, 0x696e20, 0x6c6120, 0x6c6520, 0x6c6f72, 0x6c7569,
0x6e6520, 0x6e7472, 0x6f7220, 0x70656e, 0x726520, 0x726561, 0x727520,
0x73e320, 0x746520, 0x747275, 0x74e320, 0x756920, 0x756c20, 0xba6920,
0xee6e20,
]
),
];
}
name(det: Context): string {
return det && det.c1Bytes ? 'windows-1250' : 'ISO-8859-2';
}
}
export class ISO_8859_5 extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x20, 0xfe, 0xff, 0xd0, 0xd1, 0xd2, 0xd3,
0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb,
0xec, 0xed, 0xee, 0xef, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0x20, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0x20, 0xfe, 0xff,
];
}
ngrams() {
return [
0x20d220, 0x20d2de, 0x20d4de, 0x20d7d0, 0x20d820, 0x20dad0, 0x20dade,
0x20ddd0, 0x20ddd5, 0x20ded1, 0x20dfde, 0x20dfe0, 0x20e0d0, 0x20e1de,
0x20e1e2, 0x20e2de, 0x20e7e2, 0x20ede2, 0xd0ddd8, 0xd0e2ec, 0xd3de20,
0xd5dbec, 0xd5ddd8, 0xd5e1e2, 0xd5e220, 0xd820df, 0xd8d520, 0xd8d820,
0xd8ef20, 0xdbd5dd, 0xdbd820, 0xdbecdd, 0xddd020, 0xddd520, 0xddd8d5,
0xddd8ef, 0xddde20, 0xddded2, 0xde20d2, 0xde20df, 0xde20e1, 0xded220,
0xded2d0, 0xded3de, 0xded920, 0xdedbec, 0xdedc20, 0xdee1e2, 0xdfdedb,
0xdfe0d5, 0xdfe0d8, 0xdfe0de, 0xe0d0d2, 0xe0d5d4, 0xe1e2d0, 0xe1e2d2,
0xe1e2d8, 0xe1ef20, 0xe2d5db, 0xe2de20, 0xe2dee0, 0xe2ec20, 0xe7e2de,
0xebe520,
];
}
name() {
return 'ISO-8859-5';
}
language() {
return 'ru';
}
}
export class ISO_8859_6 extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0x20, 0x20, 0x20, 0x20, 0x20, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20,
];
}
ngrams() {
return [
0x20c7e4, 0x20c7e6, 0x20c8c7, 0x20d9e4, 0x20e1ea, 0x20e4e4, 0x20e5e6,
0x20e8c7, 0xc720c7, 0xc7c120, 0xc7ca20, 0xc7d120, 0xc7e420, 0xc7e4c3,
0xc7e4c7, 0xc7e4c8, 0xc7e4ca, 0xc7e4cc, 0xc7e4cd, 0xc7e4cf, 0xc7e4d3,
0xc7e4d9, 0xc7e4e2, 0xc7e4e5, 0xc7e4e8, 0xc7e4ea, 0xc7e520, 0xc7e620,
0xc7e6ca, 0xc820c7, 0xc920c7, 0xc920e1, 0xc920e4, 0xc920e5, 0xc920e8,
0xca20c7, 0xcf20c7, 0xcfc920, 0xd120c7, 0xd1c920, 0xd320c7, 0xd920c7,
0xd9e4e9, 0xe1ea20, 0xe420c7, 0xe4c920, 0xe4e920, 0xe4ea20, 0xe520c7,
0xe5c720, 0xe5c920, 0xe5e620, 0xe620c7, 0xe720c7, 0xe7c720, 0xe8c7e4,
0xe8e620, 0xe920c7, 0xea20c7, 0xea20e5, 0xea20e8, 0xeac920, 0xead120,
0xeae620,
];
}
name() {
return 'ISO-8859-6';
}
language() {
return 'ar';
}
}
export class ISO_8859_7 extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0xa1, 0xa2, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0xdc, 0x20, 0xdd, 0xde, 0xdf, 0x20, 0xfc, 0x20, 0xfd, 0xfe,
0xc0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb,
0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0x20, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0x20,
];
}
ngrams() {
return [
0x20e1ed, 0x20e1f0, 0x20e3e9, 0x20e4e9, 0x20e5f0, 0x20e720, 0x20eae1,
0x20ece5, 0x20ede1, 0x20ef20, 0x20f0e1, 0x20f0ef, 0x20f0f1, 0x20f3f4,
0x20f3f5, 0x20f4e7, 0x20f4ef, 0xdfe120, 0xe120e1, 0xe120f4, 0xe1e920,
0xe1ed20, 0xe1f0fc, 0xe1f220, 0xe3e9e1, 0xe5e920, 0xe5f220, 0xe720f4,
0xe7ed20, 0xe7f220, 0xe920f4, 0xe9e120, 0xe9eade, 0xe9f220, 0xeae1e9,
0xeae1f4, 0xece520, 0xed20e1, 0xed20e5, 0xed20f0, 0xede120, 0xeff220,
0xeff520, 0xf0eff5, 0xf0f1ef, 0xf0fc20, 0xf220e1, 0xf220e5, 0xf220ea,
0xf220f0, 0xf220f4, 0xf3e520, 0xf3e720, 0xf3f4ef, 0xf4e120, 0xf4e1e9,
0xf4e7ed, 0xf4e7f2, 0xf4e9ea, 0xf4ef20, 0xf4eff5, 0xf4f9ed, 0xf9ed20,
0xfeed20,
];
}
name(det: Context): string {
return det && det.c1Bytes ? 'windows-1253' : 'ISO-8859-7';
}
language() {
return 'el';
}
}
export class ISO_8859_8 extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0xb5, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0x20,
0x20, 0x20, 0x20, 0x20,
];
}
ngrams() {
return [
new NGramsPlusLang(
'he',
[
0x20e0e5, 0x20e0e7, 0x20e0e9, 0x20e0fa, 0x20e1e9, 0x20e1ee, 0x20e4e0,
0x20e4e5, 0x20e4e9, 0x20e4ee, 0x20e4f2, 0x20e4f9, 0x20e4fa, 0x20ece0,
0x20ece4, 0x20eee0, 0x20f2ec, 0x20f9ec, 0xe0fa20, 0xe420e0, 0xe420e1,
0xe420e4, 0xe420ec, 0xe420ee, 0xe420f9, 0xe4e5e0, 0xe5e020, 0xe5ed20,
0xe5ef20, 0xe5f820, 0xe5fa20, 0xe920e4, 0xe9e420, 0xe9e5fa, 0xe9e9ed,
0xe9ed20, 0xe9ef20, 0xe9f820, 0xe9fa20, 0xec20e0, 0xec20e4, 0xece020,
0xece420, 0xed20e0, 0xed20e1, 0xed20e4, 0xed20ec, 0xed20ee, 0xed20f9,
0xeee420, 0xef20e4, 0xf0e420, 0xf0e920, 0xf0e9ed, 0xf2ec20, 0xf820e4,
0xf8e9ed, 0xf9ec20, 0xfa20e0, 0xfa20e1, 0xfa20e4, 0xfa20ec, 0xfa20ee,
0xfa20f9,
]
),
new NGramsPlusLang(
'he',
[
0x20e0e5, 0x20e0ec, 0x20e4e9, 0x20e4ec, 0x20e4ee, 0x20e4f0, 0x20e9f0,
0x20ecf2, 0x20ecf9, 0x20ede5, 0x20ede9, 0x20efe5, 0x20efe9, 0x20f8e5,
0x20f8e9, 0x20fae0, 0x20fae5, 0x20fae9, 0xe020e4, 0xe020ec, 0xe020ed,
0xe020fa, 0xe0e420, 0xe0e5e4, 0xe0ec20, 0xe0ee20, 0xe120e4, 0xe120ed,
0xe120fa, 0xe420e4, 0xe420e9, 0xe420ec, 0xe420ed, 0xe420ef, 0xe420f8,
0xe420fa, 0xe4ec20, 0xe5e020, 0xe5e420, 0xe7e020, 0xe9e020, 0xe9e120,
0xe9e420, 0xec20e4, 0xec20ed, 0xec20fa, 0xecf220, 0xecf920, 0xede9e9,
0xede9f0, 0xede9f8, 0xee20e4, 0xee20ed, 0xee20fa, 0xeee120, 0xeee420,
0xf2e420, 0xf920e4, 0xf920ed, 0xf920fa, 0xf9e420, 0xfae020, 0xfae420,
0xfae5e9,
]
),
];
}
name(det: Context): string {
return det && det.c1Bytes ? 'windows-1255' : 'ISO-8859-8';
}
language() {
return 'he';
}
}
export class ISO_8859_9 extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0xaa, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0xb5, 0x20, 0x20, 0x20, 0x20, 0xba, 0x20, 0x20, 0x20, 0x20, 0x20,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb,
0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0x20,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x69, 0xfe, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0x20, 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff,
];
}
ngrams() {
return [
0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861,
0x20696c, 0x206b61, 0x206b6f, 0x206d61, 0x206f6c, 0x207361, 0x207461,
0x207665, 0x207961, 0x612062, 0x616b20, 0x616c61, 0x616d61, 0x616e20,
0x616efd, 0x617220, 0x617261, 0x6172fd, 0x6173fd, 0x617961, 0x626972,
0x646120, 0x646520, 0x646920, 0x652062, 0x65206b, 0x656469, 0x656e20,
0x657220, 0x657269, 0x657369, 0x696c65, 0x696e20, 0x696e69, 0x697220,
0x6c616e, 0x6c6172, 0x6c6520, 0x6c6572, 0x6e2061, 0x6e2062, 0x6e206b,
0x6e6461, 0x6e6465, 0x6e6520, 0x6e6920, 0x6e696e, 0x6efd20, 0x72696e,
0x72fd6e, 0x766520, 0x796120, 0x796f72, 0xfd6e20, 0xfd6e64, 0xfd6efd,
0xfdf0fd,
];
}
name(det: Context): string {
return det && det.c1Bytes ? 'windows-1254' : 'ISO-8859-9';
}
language() {
return 'tr';
}
}
export class windows_1251 extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x90, 0x83, 0x20, 0x83,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x9a, 0x20, 0x9c, 0x9d, 0x9e, 0x9f,
0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x9a, 0x20,
0x9c, 0x9d, 0x9e, 0x9f, 0x20, 0xa2, 0xa2, 0xbc, 0x20, 0xb4, 0x20, 0x20,
0xb8, 0x20, 0xba, 0x20, 0x20, 0x20, 0x20, 0xbf, 0x20, 0x20, 0xb3, 0xb3,
0xb4, 0xb5, 0x20, 0x20, 0xb8, 0x20, 0xba, 0x20, 0xbc, 0xbe, 0xbe, 0xbf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb,
0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff,
];
}
ngrams() {
return [
0x20e220, 0x20e2ee, 0x20e4ee, 0x20e7e0, 0x20e820, 0x20eae0, 0x20eaee,
0x20ede0, 0x20ede5, 0x20eee1, 0x20efee, 0x20eff0, 0x20f0e0, 0x20f1ee,
0x20f1f2, 0x20f2ee, 0x20f7f2, 0x20fdf2, 0xe0ede8, 0xe0f2fc, 0xe3ee20,
0xe5ebfc, 0xe5ede8, 0xe5f1f2, 0xe5f220, 0xe820ef, 0xe8e520, 0xe8e820,
0xe8ff20, 0xebe5ed, 0xebe820, 0xebfced, 0xede020, 0xede520, 0xede8e5,
0xede8ff, 0xedee20, 0xedeee2, 0xee20e2, 0xee20ef, 0xee20f1, 0xeee220,
0xeee2e0, 0xeee3ee, 0xeee920, 0xeeebfc, 0xeeec20, 0xeef1f2, 0xefeeeb,
0xeff0e5, 0xeff0e8, 0xeff0ee, 0xf0e0e2, 0xf0e5e4, 0xf1f2e0, 0xf1f2e2,
0xf1f2e8, 0xf1ff20, 0xf2e5eb, 0xf2ee20, 0xf2eef0, 0xf2fc20, 0xf7f2ee,
0xfbf520,
];
}
name() {
return 'windows-1251';
}
language() {
return 'ru';
}
}
export class windows_1256 extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x81, 0x20, 0x83,
0x20, 0x20, 0x20, 0x20, 0x88, 0x20, 0x8a, 0x20, 0x9c, 0x8d, 0x8e, 0x8f,
0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x98, 0x20, 0x9a, 0x20,
0x9c, 0x20, 0x20, 0x9f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0xaa, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0xb5, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0x20,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3,
0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0x20, 0x20, 0x20, 0x20, 0xf4, 0x20, 0x20, 0x20, 0x20, 0xf9, 0x20, 0xfb,
0xfc, 0x20, 0x20, 0xff,
];
}
ngrams() {
return [
0x20c7e1, 0x20c7e4, 0x20c8c7, 0x20dae1, 0x20dded, 0x20e1e1, 0x20e3e4,
0x20e6c7, 0xc720c7, 0xc7c120, 0xc7ca20, 0xc7d120, 0xc7e120, 0xc7e1c3,
0xc7e1c7, 0xc7e1c8, 0xc7e1ca, 0xc7e1cc, 0xc7e1cd, 0xc7e1cf, 0xc7e1d3,
0xc7e1da, 0xc7e1de, 0xc7e1e3, 0xc7e1e6, 0xc7e1ed, 0xc7e320, 0xc7e420,
0xc7e4ca, 0xc820c7, 0xc920c7, 0xc920dd, 0xc920e1, 0xc920e3, 0xc920e6,
0xca20c7, 0xcf20c7, 0xcfc920, 0xd120c7, 0xd1c920, 0xd320c7, 0xda20c7,
0xdae1ec, 0xdded20, 0xe120c7, 0xe1c920, 0xe1ec20, 0xe1ed20, 0xe320c7,
0xe3c720, 0xe3c920, 0xe3e420, 0xe420c7, 0xe520c7, 0xe5c720, 0xe6c7e1,
0xe6e420, 0xec20c7, 0xed20c7, 0xed20e3, 0xed20e6, 0xedc920, 0xedd120,
0xede420,
];
}
name() {
return 'windows-1256';
}
language() {
return 'ar';
}
}
export class KOI8_R extends sbcs {
byteMap() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xa3, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xa3,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb,
0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xc0, 0xc1, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
0xdc, 0xdd, 0xde, 0xdf,
];
}
ngrams() {
return [
0x20c4cf, 0x20c920, 0x20cbc1, 0x20cbcf, 0x20cec1, 0x20cec5, 0x20cfc2,
0x20d0cf, 0x20d0d2, 0x20d2c1, 0x20d3cf, 0x20d3d4, 0x20d4cf, 0x20d720,
0x20d7cf, 0x20dac1, 0x20dcd4, 0x20ded4, 0xc1cec9, 0xc1d4d8, 0xc5ccd8,
0xc5cec9, 0xc5d3d4, 0xc5d420, 0xc7cf20, 0xc920d0, 0xc9c520, 0xc9c920,
0xc9d120, 0xccc5ce, 0xccc920, 0xccd8ce, 0xcec120, 0xcec520, 0xcec9c5,
0xcec9d1, 0xcecf20, 0xcecfd7, 0xcf20d0, 0xcf20d3, 0xcf20d7, 0xcfc7cf,
0xcfca20, 0xcfccd8, 0xcfcd20, 0xcfd3d4, 0xcfd720, 0xcfd7c1, 0xd0cfcc,
0xd0d2c5, 0xd0d2c9, 0xd0d2cf, 0xd2c1d7, 0xd2c5c4, 0xd3d120, 0xd3d4c1,
0xd3d4c9, 0xd3d4d7, 0xd4c5cc, 0xd4cf20, 0xd4cfd2, 0xd4d820, 0xd9c820,
0xded4cf,
];
}
name() {
return 'KOI8-R';
}
language() {
return 'ru';
}
}
/*
module.exports.ISO_8859_7 = function() {
this.byteMap = function() {
return [
];
};
this.ngrams = function() {
return [
];
};
this.name = function(det) {
if (typeof det == 'undefined')
return 'ISO-8859-7';
return det.c1Bytes ? 'windows-1253' : 'ISO-8859-7';
};
language() {
return 'el';
};
};
util.inherits(module.exports.ISO_8859_7, sbcs);
*/