This commit is contained in:
Dmitry Shirokov 2013-08-12 12:10:06 +10:00
parent 199a24bad6
commit 9fd949edf3
1 changed files with 25 additions and 27 deletions

View File

@ -27,8 +27,8 @@ function binarySearch(arr, searchValue) {
return find(arr, searchValue, 0, arr.length - 1); return find(arr, searchValue, 0, arr.length - 1);
}; };
// "Character" iterated character class. // 'Character' iterated character class.
// Recognizers for specific mbcs encodings make their "characters" available // Recognizers for specific mbcs encodings make their 'characters' available
// by providing a nextChar() function that fills in an instance of iteratedChar // by providing a nextChar() function that fills in an instance of iteratedChar
// with the next char from the input. // with the next char from the input.
// The returned characters are not converted to Unicode, but remain as the raw // The returned characters are not converted to Unicode, but remain as the raw
@ -177,7 +177,7 @@ mbcs.prototype.match = function(det) {
* *
* This function is not a method of class iteratedChar only because * This function is not a method of class iteratedChar only because
* that would require a lot of extra derived classes, which is awkward. * that would require a lot of extra derived classes, which is awkward.
* @param it The iteratedChar "struct" into which the returned char is placed. * @param it The iteratedChar 'struct' into which the returned char is placed.
* @param det The charset detector, which is needed to get at the input byte data * @param det The charset detector, which is needed to get at the input byte data
* being iterated over. * being iterated over.
* @return True if a character was returned, false at end of input. * @return True if a character was returned, false at end of input.
@ -192,10 +192,10 @@ mbcs.prototype.nextChar = function(iter, det) {};
*/ */
module.exports.sjis = function() { module.exports.sjis = function() {
this.name = function() { this.name = function() {
return "Shift-JIS"; return 'Shift-JIS';
}; };
this.language = function() { this.language = function() {
return "ja"; return 'ja';
}; };
// TODO: This set of data comes from the character frequency- // TODO: This set of data comes from the character frequency-
@ -216,18 +216,16 @@ module.exports.sjis = function() {
var firstByte; var firstByte;
firstByte = iter.charValue = iter.nextByte(det); firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0) { if (firstByte < 0)
return false; return false;
}
if (firstByte <= 0x7f || (firstByte>0xa0 && firstByte<=0xdf)) { if (firstByte <= 0x7f || (firstByte > 0xa0 && firstByte <= 0xdf))
return true; return true;
}
var secondByte = iter.nextByte(det); var secondByte = iter.nextByte(det);
if (secondByte < 0) { if (secondByte < 0)
return false; return false;
}
iter.charValue = (firstByte << 8) | secondByte; iter.charValue = (firstByte << 8) | secondByte;
if (! ((secondByte >= 0x40 && secondByte <= 0x7f) || (secondByte >= 0x80 && secondByte <= 0xff))) { if (! ((secondByte >= 0x40 && secondByte <= 0x7f) || (secondByte >= 0x80 && secondByte <= 0xff))) {
// Illegal second byte value. // Illegal second byte value.
@ -245,10 +243,10 @@ util.inherits(module.exports.sjis, mbcs);
*/ */
module.exports.big5 = function() { module.exports.big5 = function() {
this.name = function() { this.name = function() {
return "Big5"; return 'Big5';
}; };
this.language = function() { this.language = function() {
return "zh"; return 'zh';
}; };
// TODO: This set of data comes from the character frequency- // TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved // of-occurence analysis tool. The data needs to be moved
@ -300,7 +298,7 @@ util.inherits(module.exports.big5, mbcs);
* and nested derived classes for EUC_KR, EUC_JP, EUC_CN. * and nested derived classes for EUC_KR, EUC_JP, EUC_CN.
* *
* Get the next character value for EUC based encodings. * Get the next character value for EUC based encodings.
* Character "value" is simply the raw bytes that make up the character * Character 'value' is simply the raw bytes that make up the character
* packed into an int. * packed into an int.
*/ */
function eucNextChar(iter, det) { function eucNextChar(iter, det) {
@ -363,10 +361,10 @@ function eucNextChar(iter, det) {
*/ */
module.exports.euc_jp = function() { module.exports.euc_jp = function() {
this.name = function() { this.name = function() {
return "EUC-JP"; return 'EUC-JP';
}; };
this.language = function() { this.language = function() {
return "ja"; return 'ja';
}; };
// TODO: This set of data comes from the character frequency- // TODO: This set of data comes from the character frequency-
@ -397,10 +395,10 @@ util.inherits(module.exports.euc_jp, mbcs);
*/ */
module.exports.euc_kr = function() { module.exports.euc_kr = function() {
this.name = function() { this.name = function() {
return "EUC-KR"; return 'EUC-KR';
}; };
this.language = function() { this.language = function() {
return "ko"; return 'ko';
}; };
// TODO: This set of data comes from the character frequency- // TODO: This set of data comes from the character frequency-
@ -430,15 +428,15 @@ util.inherits(module.exports.euc_kr, mbcs);
*/ */
module.exports.gb_18030 = function() { module.exports.gb_18030 = function() {
this.name = function() { this.name = function() {
return "GB18030"; return 'GB18030';
}; };
this.language = function() { this.language = function() {
return "zh"; return 'zh';
}; };
/* /*
* Get the next character value for EUC based encodings. * Get the next character value for EUC based encodings.
* Character "value" is simply the raw bytes that make up the character * Character 'value' is simply the raw bytes that make up the character
* packed into an int. * packed into an int.
*/ */
this.nextChar = function(iter, det) { this.nextChar = function(iter, det) {