2020-05-07 23:06:28 +00:00
|
|
|
import { Match } from './match';
|
2020-03-30 03:42:04 +00:00
|
|
|
import { Recogniser, Context } from './encoding';
|
|
|
|
|
2020-09-25 11:27:49 +00:00
|
|
|
import loadFs from './fs/node';
|
|
|
|
|
2020-05-07 23:06:28 +00:00
|
|
|
import Utf8 from './encoding/utf8';
|
|
|
|
import * as unicode from './encoding/unicode';
|
|
|
|
import * as mbcs from './encoding/mbcs';
|
|
|
|
import * as sbcs from './encoding/sbcs';
|
|
|
|
import * as iso2022 from './encoding/iso2022';
|
2020-03-30 03:42:04 +00:00
|
|
|
|
|
|
|
interface FullOptions {
|
|
|
|
sampleSize: number
|
|
|
|
}
|
|
|
|
|
|
|
|
type Options = Partial<FullOptions>
|
|
|
|
|
2020-05-07 23:06:28 +00:00
|
|
|
const recognisers: Recogniser[] = [
|
2020-03-30 03:42:04 +00:00
|
|
|
new Utf8(),
|
|
|
|
new unicode.UTF_16BE(),
|
|
|
|
new unicode.UTF_16LE(),
|
|
|
|
new unicode.UTF_32BE(),
|
|
|
|
new unicode.UTF_32LE(),
|
|
|
|
new mbcs.sjis(),
|
|
|
|
new mbcs.big5(),
|
|
|
|
new mbcs.euc_jp(),
|
|
|
|
new mbcs.euc_kr(),
|
|
|
|
new mbcs.gb_18030(),
|
|
|
|
new iso2022.ISO_2022_JP(),
|
|
|
|
new iso2022.ISO_2022_KR(),
|
|
|
|
new iso2022.ISO_2022_CN(),
|
|
|
|
new sbcs.ISO_8859_1(),
|
|
|
|
new sbcs.ISO_8859_2(),
|
|
|
|
new sbcs.ISO_8859_5(),
|
|
|
|
new sbcs.ISO_8859_6(),
|
|
|
|
new sbcs.ISO_8859_7(),
|
|
|
|
new sbcs.ISO_8859_8(),
|
|
|
|
new sbcs.ISO_8859_9(),
|
|
|
|
new sbcs.windows_1251(),
|
|
|
|
new sbcs.windows_1256(),
|
2020-05-07 23:06:28 +00:00
|
|
|
new sbcs.KOI8_R(),
|
2020-03-30 03:42:04 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
type DetectResult = Match[] | string | null;
|
|
|
|
|
2020-06-24 04:54:43 +00:00
|
|
|
export const detect = (buffer: Uint8Array): string | null => {
|
2020-03-30 03:42:04 +00:00
|
|
|
const matches: Match[] = analyse(buffer);
|
|
|
|
return matches.length > 0 ? matches[0].name : null;
|
|
|
|
};
|
|
|
|
|
2020-06-24 04:54:43 +00:00
|
|
|
export const analyse = (buffer: Uint8Array): Match[] => {
|
2020-03-30 03:42:04 +00:00
|
|
|
// Tally up the byte occurrence statistics.
|
2020-05-07 23:06:28 +00:00
|
|
|
const fByteStats = [];
|
|
|
|
for (let i = 0; i < 256; i++) fByteStats[i] = 0;
|
2020-03-30 03:42:04 +00:00
|
|
|
|
2020-05-07 23:06:28 +00:00
|
|
|
for (let i = buffer.length - 1; i >= 0; i--) fByteStats[buffer[i] & 0x00ff]++;
|
2020-03-30 03:42:04 +00:00
|
|
|
|
2020-05-07 23:06:28 +00:00
|
|
|
let fC1Bytes = false;
|
|
|
|
for (let i = 0x80; i <= 0x9f; i += 1) {
|
|
|
|
if (fByteStats[i] !== 0) {
|
2020-03-30 03:42:04 +00:00
|
|
|
fC1Bytes = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-07 23:06:28 +00:00
|
|
|
const context: Context = {
|
|
|
|
fByteStats,
|
|
|
|
fC1Bytes,
|
2020-03-30 03:42:04 +00:00
|
|
|
fRawInput: buffer,
|
|
|
|
fRawLength: buffer.length,
|
|
|
|
fInputBytes: buffer,
|
2020-05-07 23:06:28 +00:00
|
|
|
fInputLen: buffer.length,
|
2020-03-30 03:42:04 +00:00
|
|
|
};
|
|
|
|
|
2020-05-07 23:06:28 +00:00
|
|
|
const matches = recognisers
|
2020-03-30 03:42:04 +00:00
|
|
|
.map((rec) => {
|
|
|
|
return rec.match(context);
|
|
|
|
})
|
|
|
|
.filter((match) => {
|
|
|
|
return !!match;
|
|
|
|
})
|
|
|
|
.sort((a, b) => {
|
|
|
|
return b!.confidence - a!.confidence;
|
|
|
|
});
|
|
|
|
|
|
|
|
return matches as Match[];
|
|
|
|
}
|
|
|
|
|
|
|
|
export const detectFile = (filepath: string, opts: Options = {}): Promise<DetectResult> =>
|
|
|
|
new Promise((resolve, reject) => {
|
2020-05-07 23:06:28 +00:00
|
|
|
let fd: any;
|
2020-03-30 03:42:04 +00:00
|
|
|
const fs = loadFs();
|
|
|
|
|
2020-05-07 23:06:28 +00:00
|
|
|
const handler = (err: Error | null | undefined, buffer: Buffer) => {
|
2020-03-30 03:42:04 +00:00
|
|
|
if (fd) {
|
|
|
|
fs.closeSync(fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (err) {
|
|
|
|
reject(err);
|
|
|
|
} else {
|
|
|
|
resolve(detect(buffer));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (opts && opts.sampleSize) {
|
2020-05-07 23:06:28 +00:00
|
|
|
fd = fs.openSync(filepath, 'r');
|
2020-03-30 03:42:04 +00:00
|
|
|
const sample: Buffer = Buffer.allocUnsafe(opts.sampleSize);
|
|
|
|
|
|
|
|
fs.read(fd, sample, 0, opts.sampleSize, null, (err?: Error) => {
|
|
|
|
handler(err, sample);
|
|
|
|
});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
fs.readFile(filepath, handler);
|
|
|
|
});
|
|
|
|
|
|
|
|
export const detectFileSync = (filepath: string, opts: Options = {}): DetectResult => {
|
|
|
|
const fs = loadFs();
|
|
|
|
|
|
|
|
if (opts && opts.sampleSize) {
|
2020-05-07 23:06:28 +00:00
|
|
|
const fd = fs.openSync(filepath, 'r');
|
|
|
|
const sample = Buffer.allocUnsafe(opts.sampleSize);
|
2020-03-30 03:42:04 +00:00
|
|
|
|
|
|
|
fs.readSync(fd, sample, 0, opts.sampleSize);
|
|
|
|
fs.closeSync(fd);
|
|
|
|
return detect(sample);
|
|
|
|
}
|
|
|
|
|
|
|
|
return detect(fs.readFileSync(filepath));
|
|
|
|
};
|
2020-05-07 23:06:28 +00:00
|
|
|
|
|
|
|
export default {
|
|
|
|
analyse,
|
|
|
|
detect,
|
|
|
|
detectFileSync,
|
|
|
|
detectFile,
|
|
|
|
};
|