Compare commits
1 Commits
master
...
accept-str
Author | SHA1 | Date |
---|---|---|
Dmitry Shirokov | 2fb68b1f48 |
|
@ -4,3 +4,4 @@ node_modules
|
|||
coverage
|
||||
npm-debug.log
|
||||
lib
|
||||
TODO.md
|
||||
|
|
16
README.md
16
README.md
|
@ -1,8 +1,14 @@
|
|||
# chardet [![Build Status](https://travis-ci.org/runk/node-chardet.png)](https://travis-ci.org/runk/node-chardet)
|
||||
|
||||
Chardet is a character detection module for NodeJS written in pure Javascript.
|
||||
Module is based on ICU project http://site.icu-project.org/, which uses character
|
||||
occurency analysis to determine the most probable encoding.
|
||||
*Chardet* is a character detection module written in pure Javascript (Typescript). Module uses occurrence analysis to determine the most probable encoding.
|
||||
|
||||
- Packed size is only **22 KB**
|
||||
- No dependencies
|
||||
- No native code / bindings
|
||||
- Works in all environments: Node / Browser / Native
|
||||
- Works on all platforms: Linux / Mac / Windows
|
||||
- 100% written in Typescript
|
||||
- Extensive code coverage
|
||||
|
||||
## Installation
|
||||
|
||||
|
@ -87,3 +93,7 @@ Currently only these encodings are supported.
|
|||
## Typescript?
|
||||
|
||||
Yes. Type definitions are included.
|
||||
|
||||
### References
|
||||
|
||||
- ICU project http://site.icu-project.org/
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"name": "chardet",
|
||||
"version": "0.0.0-development",
|
||||
"homepage": "https://github.com/runk/node-chardet",
|
||||
"description": "Character detector",
|
||||
"description": "Character encoding detector",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
@ -50,7 +50,11 @@
|
|||
"utf8",
|
||||
"detector",
|
||||
"chardet",
|
||||
"icu"
|
||||
"icu",
|
||||
"character detection",
|
||||
"character encoding",
|
||||
"iconv",
|
||||
"iconv-light"
|
||||
],
|
||||
"author": "Dmitry Shirokov <deadrunk@gmail.com>",
|
||||
"contributors": [
|
||||
|
|
|
@ -5,6 +5,8 @@ import fs from 'fs';
|
|||
describe('chardet', () => {
|
||||
|
||||
const path = __dirname + '/test/data/encodings/utf8';
|
||||
const getInput = () => fs.readFileSync(path);
|
||||
|
||||
const expectedEncodingsFromPath = [
|
||||
{ 'confidence': 100, 'name': 'UTF-8', 'lang': undefined },
|
||||
{ 'confidence': 32, 'name': 'windows-1252', 'lang': 'fr' },
|
||||
|
@ -25,8 +27,12 @@ describe('chardet', () => {
|
|||
});
|
||||
|
||||
describe('#detect', () => {
|
||||
it('should detect encoding', () => {
|
||||
expect(chardet.detect(fs.readFileSync(path))).toBe('UTF-8');
|
||||
it('should detect encoding from a buffer', () => {
|
||||
expect(chardet.detect(getInput())).toBe('UTF-8');
|
||||
});
|
||||
|
||||
it('should detect encoding from a string', () => {
|
||||
expect(chardet.detect(getInput().toString('utf-8'))).toBe('UTF-8');
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -54,7 +60,12 @@ describe('chardet', () => {
|
|||
|
||||
describe('#analyse', () => {
|
||||
it('should return a list of encodings, sorted by confidence level in decending order', () => {
|
||||
const matches = chardet.analyse(fs.readFileSync(path));
|
||||
const matches = chardet.analyse(getInput());
|
||||
expect(matches).toEqual(expectedEncodingsFromPath);
|
||||
});
|
||||
|
||||
it('should work for strings as inputs', () => {
|
||||
const matches = chardet.analyse(getInput().toString('utf8'));
|
||||
expect(matches).toEqual(expectedEncodingsFromPath);
|
||||
});
|
||||
});
|
||||
|
|
|
@ -49,13 +49,16 @@ const recognisers: Recogniser[] = [
|
|||
];
|
||||
|
||||
type DetectResult = Match[] | string | null;
|
||||
type InputData = Uint8Array | string;
|
||||
|
||||
export const detect = (buffer: Uint8Array): string | null => {
|
||||
const matches: Match[] = analyse(buffer);
|
||||
export const detect = (input: InputData): string | null => {
|
||||
const matches: Match[] = analyse(input);
|
||||
return matches.length > 0 ? matches[0].name : null;
|
||||
};
|
||||
|
||||
export const analyse = (buffer: Uint8Array): Match[] => {
|
||||
export const analyse = (input: InputData): Match[] => {
|
||||
const buffer = typeof input === 'string' ? Buffer.from(input) : input;
|
||||
|
||||
// Tally up the byte occurrence statistics.
|
||||
const fByteStats = [];
|
||||
for (let i = 0; i < 256; i++) fByteStats[i] = 0;
|
||||
|
|
Loading…
Reference in New Issue