2013-03-04 19:47:01 +00:00
|
|
|
|
2013-04-29 14:31:48 +00:00
|
|
|
var fs = require('fs');
|
2013-03-04 19:47:01 +00:00
|
|
|
|
2013-11-22 04:37:41 +00:00
|
|
|
var utf8 = require('./encoding/utf8'),
|
|
|
|
unicode = require('./encoding/unicode'),
|
|
|
|
mbcs = require('./encoding/mbcs'),
|
|
|
|
sbcs = require('./encoding/sbcs'),
|
|
|
|
iso2022 = require('./encoding/iso2022');
|
2013-03-04 19:47:01 +00:00
|
|
|
|
2013-04-29 14:31:48 +00:00
|
|
|
var self = this;
|
|
|
|
|
2013-03-04 19:47:01 +00:00
|
|
|
var recognisers = [
|
2013-11-22 04:37:41 +00:00
|
|
|
new utf8,
|
|
|
|
new unicode.UTF_16BE,
|
|
|
|
new unicode.UTF_16LE,
|
|
|
|
new unicode.UTF_32BE,
|
|
|
|
new unicode.UTF_32LE,
|
|
|
|
new mbcs.sjis,
|
|
|
|
new mbcs.big5,
|
|
|
|
new mbcs.euc_jp,
|
|
|
|
new mbcs.euc_kr,
|
|
|
|
new mbcs.gb_18030,
|
|
|
|
new iso2022.ISO_2022_JP,
|
|
|
|
new iso2022.ISO_2022_KR,
|
|
|
|
new iso2022.ISO_2022_CN,
|
|
|
|
new sbcs.ISO_8859_1,
|
|
|
|
new sbcs.ISO_8859_2,
|
|
|
|
new sbcs.ISO_8859_5,
|
|
|
|
new sbcs.ISO_8859_6,
|
|
|
|
new sbcs.ISO_8859_7,
|
|
|
|
new sbcs.ISO_8859_8,
|
|
|
|
new sbcs.ISO_8859_9,
|
|
|
|
new sbcs.windows_1251,
|
|
|
|
new sbcs.windows_1256,
|
|
|
|
new sbcs.KOI8_R
|
2013-03-04 19:47:01 +00:00
|
|
|
];
|
|
|
|
|
2018-04-22 07:57:54 +00:00
|
|
|
module.exports.detect = function(buffer, opts) {
|
2013-03-04 19:47:01 +00:00
|
|
|
|
2013-11-22 04:37:41 +00:00
|
|
|
// Tally up the byte occurence statistics.
|
|
|
|
var fByteStats = [];
|
|
|
|
for (var i = 0; i < 256; i++)
|
|
|
|
fByteStats[i] = 0;
|
2013-05-04 09:27:28 +00:00
|
|
|
|
2013-11-22 04:37:41 +00:00
|
|
|
for (var i = buffer.length - 1; i >= 0; i--)
|
|
|
|
fByteStats[buffer[i] & 0x00ff]++;
|
2013-05-04 09:27:28 +00:00
|
|
|
|
2013-11-22 04:37:41 +00:00
|
|
|
var fC1Bytes = false;
|
|
|
|
for (var i = 0x80; i <= 0x9F; i += 1) {
|
|
|
|
if (fByteStats[i] != 0) {
|
|
|
|
fC1Bytes = true;
|
|
|
|
break;
|
2013-05-04 09:27:28 +00:00
|
|
|
}
|
2013-11-22 04:37:41 +00:00
|
|
|
}
|
2013-05-04 09:27:28 +00:00
|
|
|
|
2013-11-22 04:37:41 +00:00
|
|
|
var context = {
|
|
|
|
fByteStats: fByteStats,
|
|
|
|
fC1Bytes: fC1Bytes,
|
|
|
|
fRawInput: buffer,
|
|
|
|
fRawLength: buffer.length,
|
|
|
|
fInputBytes: buffer,
|
|
|
|
fInputLen: buffer.length
|
|
|
|
};
|
2013-03-04 19:47:01 +00:00
|
|
|
|
2018-04-22 07:57:54 +00:00
|
|
|
var matches = recognisers.map(function(rec) {
|
2013-11-22 04:37:41 +00:00
|
|
|
return rec.match(context);
|
|
|
|
}).filter(function(match) {
|
|
|
|
return !!match;
|
|
|
|
}).sort(function(a, b) {
|
2018-04-22 07:57:54 +00:00
|
|
|
return b.confidence - a.confidence;
|
|
|
|
});
|
2013-03-04 19:47:01 +00:00
|
|
|
|
2018-04-22 07:57:54 +00:00
|
|
|
if (opts && opts.returnAllMatches === true) {
|
|
|
|
return matches;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return matches.length > 0 ? matches[0].name : null;
|
|
|
|
}
|
2013-04-29 14:31:48 +00:00
|
|
|
};
|
|
|
|
|
2017-10-16 00:42:49 +00:00
|
|
|
module.exports.detectFile = function(filepath, opts, cb) {
|
|
|
|
if (typeof opts === 'function') {
|
|
|
|
cb = opts;
|
|
|
|
opts = undefined;
|
|
|
|
}
|
|
|
|
|
|
|
|
var fd;
|
|
|
|
|
|
|
|
var handler = function(err, buffer) {
|
|
|
|
if (fd) {
|
|
|
|
fs.closeSync(fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (err) return cb(err, null);
|
2018-04-22 07:57:54 +00:00
|
|
|
cb(null, self.detect(buffer, opts));
|
2017-10-16 00:42:49 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
if (opts && opts.sampleSize) {
|
|
|
|
fd = fs.openSync(filepath, 'r'),
|
2018-07-29 19:01:21 +00:00
|
|
|
sample = Buffer.allocUnsafe(opts.sampleSize);
|
2017-10-16 00:42:49 +00:00
|
|
|
|
|
|
|
fs.read(fd, sample, 0, opts.sampleSize, null, function(err) {
|
|
|
|
handler(err, sample);
|
|
|
|
});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
fs.readFile(filepath, handler);
|
2013-04-29 14:31:48 +00:00
|
|
|
};
|
|
|
|
|
2017-10-16 00:42:49 +00:00
|
|
|
module.exports.detectFileSync = function(filepath, opts) {
|
|
|
|
if (opts && opts.sampleSize) {
|
|
|
|
var fd = fs.openSync(filepath, 'r'),
|
2018-07-29 19:01:21 +00:00
|
|
|
sample = Buffer.allocUnsafe(opts.sampleSize);
|
2017-10-16 00:42:49 +00:00
|
|
|
|
|
|
|
fs.readSync(fd, sample, 0, opts.sampleSize);
|
|
|
|
fs.closeSync(fd);
|
2018-04-22 07:57:54 +00:00
|
|
|
return self.detect(sample, opts);
|
2017-10-16 00:42:49 +00:00
|
|
|
}
|
|
|
|
|
2018-04-22 07:57:54 +00:00
|
|
|
return self.detect(fs.readFileSync(filepath), opts);
|
2013-08-06 01:35:58 +00:00
|
|
|
};
|
2018-07-01 07:27:14 +00:00
|
|
|
|
|
|
|
// Wrappers for the previous functions to return all encodings
|
|
|
|
module.exports.detectAll = function(buffer, opts) {
|
|
|
|
if (typeof opts !== 'object') {
|
|
|
|
opts = {};
|
|
|
|
}
|
|
|
|
opts.returnAllMatches = true;
|
|
|
|
return self.detect(buffer, opts);
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports.detectFileAll = function(filepath, opts, cb) {
|
|
|
|
if (typeof opts === 'function') {
|
|
|
|
cb = opts;
|
|
|
|
opts = undefined;
|
|
|
|
}
|
|
|
|
if (typeof opts !== 'object') {
|
|
|
|
opts = {};
|
|
|
|
}
|
|
|
|
opts.returnAllMatches = true;
|
|
|
|
self.detectFile(filepath, opts, cb);
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports.detectFileAllSync = function(filepath, opts) {
|
|
|
|
if (typeof opts !== 'object') {
|
|
|
|
opts = {};
|
|
|
|
}
|
|
|
|
opts.returnAllMatches = true;
|
|
|
|
return self.detectFileSync(filepath, opts);
|
|
|
|
}
|