feat: Support strings as inputs

2020-09-25 10:27:21 +10:00
5 changed files with 40 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,4 @@ node_modules
 coverage
 npm-debug.log
 lib
 TODO.md
--- a/README.md
+++ b/README.md
@ -1,8 +1,14 @@
 # chardet [![Build Status](https://travis-ci.org/runk/node-chardet.png)](https://travis-ci.org/runk/node-chardet)
-Chardet is a character detection module for NodeJS written in pure Javascript.
+*Chardet* is a character detection module written in pure Javascript (Typescript). Module uses occurrence analysis to determine the most probable encoding.
-Module is based on ICU project http://site.icu-project.org/, which uses character
+
-occurency analysis to determine the most probable encoding.
+- Packed size is only **22 KB**
 - No dependencies
 - No native code / bindings
 - Works in all environments: Node / Browser / Native
 - Works on all platforms: Linux / Mac / Windows
 - 100% written in Typescript
 - Extensive code coverage
 ## Installation
@ -87,3 +93,7 @@ Currently only these encodings are supported.
 ## Typescript?
 Yes. Type definitions are included.
 ### References
 - ICU project http://site.icu-project.org/
--- a/package.json
+++ b/package.json
@ -2,7 +2,7 @@
  "name": "chardet",
  "version": "0.0.0-development",
  "homepage": "https://github.com/runk/node-chardet",
-  "description": "Character detector",
+  "description": "Character encoding detector",
  "license": "MIT",
  "repository": {
    "type": "git",
@ -50,7 +50,11 @@
    "utf8",
    "detector",
    "chardet",
-    "icu"
+    "icu",
    "character detection",
    "character encoding",
    "iconv",
    "iconv-light"
  ],
  "author": "Dmitry Shirokov <deadrunk@gmail.com>",
  "contributors": [
--- a/src/index.test.ts
+++ b/src/index.test.ts
@ -5,6 +5,8 @@ import fs from 'fs';
 describe('chardet', () => {
  const path = __dirname + '/test/data/encodings/utf8';
  const getInput = () => fs.readFileSync(path);
  const expectedEncodingsFromPath = [
    { 'confidence': 100, 'name': 'UTF-8', 'lang': undefined },
    { 'confidence': 32, 'name': 'windows-1252', 'lang': 'fr' },
@ -25,8 +27,12 @@ describe('chardet', () => {
  });
  describe('#detect', () => {
-    it('should detect encoding', () => {
+    it('should detect encoding from a buffer', () => {
-      expect(chardet.detect(fs.readFileSync(path))).toBe('UTF-8');
+      expect(chardet.detect(getInput())).toBe('UTF-8');
    });
    it('should detect encoding from a string', () => {
      expect(chardet.detect(getInput().toString('utf-8'))).toBe('UTF-8');
    });
  });
@ -54,7 +60,12 @@ describe('chardet', () => {
  describe('#analyse', () => {
    it('should return a list of encodings, sorted by confidence level in decending order', () => {
-      const matches = chardet.analyse(fs.readFileSync(path));
+      const matches = chardet.analyse(getInput());
      expect(matches).toEqual(expectedEncodingsFromPath);
    });
    it('should work for strings as inputs', () => {
      const matches = chardet.analyse(getInput().toString('utf8'));
      expect(matches).toEqual(expectedEncodingsFromPath);
    });
  });
--- a/src/index.ts
+++ b/src/index.ts
@ -49,13 +49,16 @@ const recognisers: Recogniser[] = [
 ];
 type DetectResult = Match[] | string | null;
 type InputData = Uint8Array | string;
-export const detect = (buffer: Uint8Array): string | null => {
+export const detect = (input: InputData): string | null => {
-  const matches: Match[] = analyse(buffer);
+  const matches: Match[] = analyse(input);
  return matches.length > 0 ? matches[0].name : null;
 };
-export const analyse = (buffer: Uint8Array): Match[] => {
+export const analyse = (input: InputData): Match[] => {
  const buffer = typeof input === 'string' ? Buffer.from(input) : input;
  // Tally up the byte occurrence statistics.
  const fByteStats = [];
  for (let i = 0; i < 256; i++) fByteStats[i] = 0;