@ -15,7 +15,7 @@ module.exports = function(grunt) {
pkg: grunt.file.readJSON('package.json'),
clean: ['dist/filer-test.js', 'dist/filer-issue225.js'],
clean: ['dist/filer-test.js', 'dist/filer-issue225.js', 'dist/filer-perf.js'],
uglify: {
options: {
@ -65,9 +65,9 @@ module.exports = function(grunt) {
exclude: ["./node_modules/request/index.js"]
perf: {
src: "./tests/perf/index.js",
dest: "./tests/perf/filer-perf-test.js",
filerPerf: {
src: "./perf/index.js",
dest: "./dist/filer-perf.js",
options: {
browserifyOptions: {
commondir: false
@ -186,7 +186,7 @@ module.exports = function(grunt) {
gitadd: {
publish: {
files: {
src: ['./dist/filer-test.js']
src: ['./dist/filer-test.js', './dist/filer-perf.js']
@ -204,7 +204,7 @@ module.exports = function(grunt) {
gitrm: {
publish: {
files: {
src: ['./dist/filer-test.js']
src: ['./dist/filer-test.js', './dist/filer-perf.js']
@ -231,7 +231,7 @@ module.exports = function(grunt) {
position: "top"
files: {
src: ['./dist/filer-test.js']
src: ['./dist/filer-test.js', './dist/filer-perf.js']
@ -251,7 +251,7 @@ module.exports = function(grunt) {
grunt.registerTask('develop', ['clean', 'browserify:filerDist', 'browserify:filerIssue225']);
grunt.registerTask('build-tests', ['clean', 'browserify:filerTest']);
grunt.registerTask('build-tests', ['clean', 'browserify:filerTest', 'browserify:filerPerf']);
grunt.registerTask('release', ['test', 'develop', 'uglify']);
grunt.registerTask('publish', 'Publish filer as a new version to NPM, bower and github.', function(patchLevel) {
@ -0,0 +1,13 @@
<meta charset="utf-8" />
<progress id="progress" value=0></progress>
<div id="output"></div>
<div id="stderr"></div>
<script type="text/javascript" src="simple-statistics/src/simple_statistics.js"></script>
<script src="../dist/filer-perf.js"></script>
@ -0,0 +1,120 @@
var Filer = require('..');
var util = require('../tests/lib/test-utils.js');
function setImmediate(cb) {
setTimeout(cb, 0);
function parse_query() {
var query =;
var parsed = {};
query.split('&').forEach(function(pair) {
pair = pair.split('=');
var key = decodeURIComponent(pair[0]);
var value = decodeURIComponent(pair[1]);
parsed[key] = value;
return parsed;
var query = parse_query();
function time(test, cb) {
var start =;
function done() {
var end =;
cb(end - start);
var random_data = new Buffer(1024); // 1kB buffer
var read_buffer = new Buffer(1024);
function run(iter) {
iter = (undefined == iter) ? 0 : iter;
function before() {
util.setup(function() {
function during() {
var fs = util.fs();
time(function(done) {
fs.mkdir('/tmp', function(err) {
fs.stat('/tmp', function(err, stats) {
||||'/tmp/test', 'w', function(err, fd) {
fs.write(fd, random_data, null, null, null, function(err, nbytes) {
fs.close(fd, function(err) {
fs.stat('/tmp/test', function(err, stats) {
||||'/tmp/test', 'r', function(err, fd) {
||||, read_buffer, null, null, null, function(err, nbytes) {
fs.close(fd, function(err) {
fs.unlink('/tmp/test', function(err) {
}, after);
function after(dt) {
util.cleanup(complete.bind(null, iter, dt));
var results = [];
function complete(iter, result) {
if(++iter < iterations) {
setImmediate(run.bind(null, iter));
} else {
progress.value = iter;
function do_stats() {
var output = document.getElementById("output");
var stats = {
mean: ss.mean(results) + " ms",
min: ss.min(results),
max: ss.max(results),
med_abs_dev: ss.median_absolute_deviation(results),
var t = document.createElement("table");
var tbody = document.createElement("tbody");
var keys = Object.keys(stats);
keys.forEach(function(key) {
var row = document.createElement("tr");
var key_cell = document.createElement("td");
var key_cell_text = document.createTextNode(key);
var val_cell = document.createElement("td");
var val_cell_text = document.createTextNode(stats[key]);
var query = parse_query();
var iterations = query.iterations || 10;
var progress = document.getElementById("progress");
progress.max = iterations;
@ -0,0 +1,3 @@
@ -0,0 +1,8 @@
"indent": 4,
"undef": true,
"unused": true,
"globals": {
"require": true
@ -0,0 +1,7 @@
language: node_js
- 0.10
- npm install
- npm test
- npm run cov
@ -0,0 +1,242 @@
Basic contracts of functions:
* Functions do not modify their arguments e.g. change their order
* Invalid input, like empty lists to functions that need 1+ items to work, will cause functions to return `null`.
# Basic Array Operations
### .mixin(array)
_Optionally_ mix in the following functions into the `Array` prototype. Otherwise
you can use them off of the simple-statistics object itself.
If given a particular array instance as an argument, this adds the functions
only to that array rather than the global `Array.prototype`. Without an argument,
it runs on the global `Array.prototype`.
### .mean(x)
Mean of a single-dimensional Array of numbers. _Also available as `.average(x)`_
### .sum(x)
Sum of a single-dimensional Array of numbers.
### .mode(x)
Returns the number that appears most frequently in a single-dimensional Array
of numbers. If there are multiple modes, the one that appears last
is returned.
### .variance(x)
[Variance]( of a single-dimensional Array of numbers.
### .standard_deviation(x)
[Standard Deviation]( of a single-dimensional Array of numbers.
### .sample(array, n)
Return a [simple random sample](
of the given array. The sampling is _without replacement_, and uses a Fisher-Yates
sample to randomize.
### .median_absolute_deviation(x)
The Median Absolute Deviation (MAD) is a robust measure of statistical
dispersion. It is more resilient to outliers than the standard deviation.
Accepts a single-dimensional array of numbers and returns a dispersion value.
Also aliased to `.mad(x)` for brevity.
### .median(x)
[Median]( of a single-dimensional array of numbers.
### .geometric_mean(x)
[Geometric mean]( of a single-dimensional array of **positive** numbers.
### .harmonic_mean(x)
[Harmonic mean]( of a single-dimensional array of **positive** numbers.
### .root_mean_square(x)
[Root mean square (RMS)]( of a single-dimensional array of numbers.
Also aliased to `.rms(x)` for brevity.
### .min(x)
Finds the minimum of a single-dimensional array of numbers. This runs in linear `O(n)` time.
### .max(x)
Finds the maximum of a single-dimensional array of numbers. This runs in linear `O(n)` time.
### .t_test(sample, x)
Does a [student's t-test]('s_t-test) of a dataset `sample`, represented by a single-dimensional array of numbers. `x` is the known value, and the result is a measure of [statistical significance](
### .t_test_two_sample(sample_x, sample_y, difference)
The two-sample t-test is used to compare samples from two populations or groups,
confirming or denying the suspicion (null hypothesis) that the populations are
the same. It returns a t-value that you can then look up to give certain
judgements of confidence based on a t distribution table.
This implementation expects the samples `sample_x` and `sample_y` to be given
as one-dimensional arrays of more than one number each.
### .sample_variance(x)
Produces [sample variance](
of a single-dimensional array of numbers.
### .sample_covariance(a, b)
Produces [sample covariance](
of two single-dimensional arrays of numbers.
### .sample_correlation(a, b)
Produces [sample correlation](
of two single-dimensional arrays of numbers.
### .quantile(sample, p)
Does a [quantile]( of a dataset `sample`,
at p. For those familiary with the `k/q` syntax, `p == k/q`. `sample` must
be a single-dimensional array of numbers. p must be a number greater than or equal to
than zero and less or equal to than one, or an array of numbers following that rule.
If an array is given, an array of results will be returned instead of a single
### .chunk(sample, chunkSize)
Given a `sample` array, and a positive integer `chunkSize`, splits an array
into chunks of `chunkSize` size and returns an array of those chunks. This
does not change the input value. If the length of `sample` is not divisible
by `chunkSize`, the last array will be shorter than the rest.
### .shuffle(sample)
Given a `sample` array (with any type of contents), return a random permutation
of that array, using the [Fisher-Yates shuffle](
### .shuffle_in_place(sample)
Given a `sample` array (with any type of contents), return a random permutation
of that array, using the [Fisher-Yates shuffle](
This changes the input array in-place, as well as returns it - unlike `.shuffle()`,
it does not create a shallow copy of the array.
### .quantile_sorted(sample, p)
Does a [quantile]( of a dataset `sample`,
at p. `sample` must be a one-dimensional _sorted_ array of numbers, and
`p` must be a single number from zero to one.
### .iqr(sample)
Calculates the [Interquartile range]( of
a sample - the difference between the upper and lower quartiles. Useful
as a measure of dispersion.
_Also available as `.interquartile_range(x)`_
### .sample_skewness(sample)
Calculates the [skewness]( of
a sample, a measure of the extent to which a probability distribution of a
real-valued random variable "leans" to one side of the mean.
The skewness value can be positive or negative, or even undefined.
This implementation uses the [Fisher-Pearson standardized moment coefficient](,
which means that it behaves the same as Excel, Minitab, SAS, and SPSS.
Skewness is only valid for samples of over three values.
### .jenks(data, number_of_classes)
Find the [Jenks Natural Breaks]( for
a single-dimensional array of numbers as input and a desired `number_of_classes`.
The result is a single-dimensional with class breaks, including the minimum
and maximum of the input array.
### .r_squared(data, function)
Find the [r-squared]( value of a particular dataset, expressed as a two-dimensional `Array` of numbers, against a `Function`.
var r_squared = ss.r_squared([[1, 1]], function(x) { return x * 2; });
### .cumulative_std_normal_probability(z)
Look up the given `z` value in a [standard normal table](
to calculate the probability of a random variable appearing with a given value.
### .z_score(x, mean, standard_deviation)
The standard score is the number of standard deviations an observation
or datum is above or below the mean.
### .standard_normal_table
A [standard normal table]( from
which to pull values of Φ (phi).
## Regression
### .linear_regression()
Create a new linear regression solver.
#### .data([[1, 1], [2, 2]])
Set the data of a linear regression. The input is a two-dimensional array of numbers, which are treated as coordinates, like `[[x, y], [x1, y1]]`.
#### .line()
Get the linear regression line: this returns a function that you can
give `x` values and it will return `y` values. Internally, this uses the `m()`
and `b()` values and the classic `y = mx + b` equation.
var linear_regression_line = ss.linear_regression()
.data([[0, 1], [2, 2], [3, 3]]).line();
#### .m()
Just get the slope of the fitted regression line, the `m` component of the full
line equation. Returns a number.
#### .b()
Just get the y-intercept of the fitted regression line, the `b` component
of the line equation. Returns a number.
## Classification
### .bayesian()
Create a naïve bayesian classifier.
### .train(item, category)
Train the classifier to classify a certain item, given as an object with keys,
to be in a certain category, given as a string.
### .score(item)
Get the classifications of a certain item, given as an object of
`category -> score` mappings.
var bayes = ss.bayesian();
bayes.train({ species: 'Cat' }, 'animal');
bayes.score({ species: 'Cat' });
// { animal: 1 }
@ -0,0 +1,60 @@
## 0.9.0
* Adds `.sample` for simple random sampling
* Adds `.shuffle` and `.shuffle_in_place` for random permutations
* Adds `.chunk` for splitting arrays into chunked subsets
## 0.8.1
* fixes a bug in `mode` that favored the last new number
## 0.8.0
* `mixin` can now take an array in order to mixin functions into a single array
instance rather than the global Array prototype.
## 0.7.0
* Adds `simple_statistics.harmonic_mean` thanks to [jseppi](
## 0.6.0
* Adds `simple_statistics.quantile_sorted` thanks to [rluta](
* `simple_statistics.quantile` now accepts a sorted list of quantiles as a second argument
* Improved test coverage
## 0.5.0
* Adds `simple_statistics.cumulative_std_normal_probability` by [doronlinder](
* Adds `simple_statistics.z_score` by doronlinder
* Adds `simple_statistics.standard_normal_table`
## 0.4.0
* Adds `simple_statistics.median_absolute_deviation()` by siculars
* Adds `simple_statistics.iqr()` by siculars
* Adds `simple_statistics.skewness()` by Doron Linder
* Lower-level accessors for linear regression allow users to do the line
equation themselves
## 0.3.0
* Adds `simple_statistics.jenks()`
* Adds `simple_statistics.jenksMatrices()`
* Improves test coverage and validation
## 0.2.0
* Adds `simple_statistics.quantile()`
* Adds `simple_statistics.mixin()`
* Adds `simple_statistics.geometric_mean()`
* Adds `simple_statistics.sample_variance()`
* Adds `simple_statistics.sample_covariance()`
## 0.1.0
* Adds `simple_statistics.t_test()`
* Adds `simple_statistics.min()`
* Adds `simple_statistics.max()`
@ -0,0 +1,99 @@
# Contributing to simple-statistics
Simple statistics is a statistics library that can be both used and read.
It should help programmers learn statistics and statisticians learn programming.
In order to achieve this goal, it must be **simple** and **explanatory**.
## Simple
`simple-statistics` is written in a subset of JavaScript. Unused features
* [Conditional Operator](
* [ES5 Array methods](
* `with`, `eval`, and other forms of `eval`
* Most micro-optimizations, like [alternative for loop forms](
* [Shortcut branching](
## Explanatory
// # harmonic mean
// a mean function typically used to find the average of rates
// this is the reciprocal of the arithmetic mean of the reciprocals
// of the input numbers
// This runs on `O(n)`, linear time in respect to the array
`simple-statistics` tries to stay away from speaking only in the language of math:
for instance, while JavaScript supports UTF8 characters like π, they are not used
in the source:
* UTF8 in JavaScript on pages without specific meta-tag or Content-Type encodings will fail
* UTF8 can be hard to type, since users need to memorize key combinations or code points
* Mathematical symbols have meanings that are often better communicated by words:
in the form of code, we do not run out of space on the paper, and can afford
to call a variable `reciprocal_sum` instead of `r`.
Every function has a comment that ideally includes:
* The English, long-form name of the method
* What the method does
* What purpose the method typically serves
* A link to a longer description on Wikipedia, Mathematica, or another
web-accessible, non-paywalled source
* The efficiency of the function in terms of Big-O notation, if appropriate
* If the function depends on another function in the library, a note of this, like
`depends on mean()`
## Tests
`simple-statistics` has a testsuite located in `test/spec/`. Each test file
covers a specific topic and tries to test against known values:
* Values produced by trusted statistics software like R or scipy
* Common-sense results
Tests can be run in [node.js]( and are run on every commit
to GitHub by Travis-CI.
To run tests:
npm install
npm test
## Documentation
While the code is meant to readable, it is not documentation. We maintain
documentation in ``, which has the simple form:
### .geometric_mean(x)
[Geometric mean]( of a single-dimensional array of **positive** numbers.
This file is written in [Markdown]( and
specifies which functions are available, what type of arguments they receive,
what they compute, and what type of answer they return.
## Code Style
We use the [Airbnb style for Javascript]( with
only one difference:
**4 space soft tabs always for Javascript, not 2.**
No aligned `=`, no aligned arguments, spaces are either indents or the 1
space between expressions. No hard tabs.
* All comparisons should be as strict and obvious as possible: prefer `(foo === 0)` to
* Straightforward code is more important than most optimizations.
@ -0,0 +1,13 @@
Copyright (c) 2014, Tom MacWright
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
@ -0,0 +1,7 @@
docco src/*.js
mocha -R spec test/spec/*.js
.PHONY: docs test
@ -0,0 +1,337 @@
[]( [](
A JavaScript implementation of descriptive, regression, and inference statistics.
Implemented in literate JavaScript with no dependencies, designed to work
in all modern browsers (including IE) as well as in node.js.
## [API Documentation](
Basic contracts of functions:
* Functions do not modify their arguments e.g. change their order
* Invalid input, like empty lists to functions that need 1+ items to work, will cause functions to return `null`.
# Basic Array Operations
### .mixin(array)
_Optionally_ mix in the following functions into the `Array` prototype. Otherwise
you can use them off of the simple-statistics object itself.
If given a particular array instance as an argument, this adds the functions
only to that array rather than the global `Array.prototype`. Without an argument,
it runs on the global `Array.prototype`.
### .mean(x)
Mean of a single-dimensional Array of numbers. _Also available as `.average(x)`_
### .sum(x)
Sum of a single-dimensional Array of numbers.
### .mode(x)
Returns the number that appears most frequently in a single-dimensional Array
of numbers. If there are multiple modes, the one that appears last
is returned.
### .variance(x)
[Variance]( of a single-dimensional Array of numbers.
### .standard_deviation(x)
[Standard Deviation]( of a single-dimensional Array of numbers.
### .median_absolute_deviation(x)
The Median Absolute Deviation (MAD) is a robust measure of statistical
dispersion. It is more resilient to outliers than the standard deviation.
Accepts a single-dimensional array of numbers and returns a dispersion value.
Also aliased to `.mad(x)` for brevity.
### .median(x)
[Median]( of a single-dimensional array of numbers.
### .geometric_mean(x)
[Geometric mean]( of a single-dimensional array of **positive** numbers.
### .harmonic_mean(x)
[Harmonic mean]( of a single-dimensional array of **positive** numbers.
### .root_mean_square(x)
[Root mean square (RMS)]( of a single-dimensional array of numbers.
### .min(x)
Finds the minimum of a single-dimensional array of numbers. This runs in linear `O(n)` time.
### .max(x)
Finds the maximum of a single-dimensional array of numbers. This runs in linear `O(n)` time.
### .t_test(sample, x)
Does a [student's t-test]('s_t-test) of a dataset `sample`, represented by a single-dimensional array of numbers. `x` is the known value, and the result is a measure of [statistical significance](
### .t_test_two_sample(sample_x, sample_y, difference)
The two-sample t-test is used to compare samples from two populations or groups,
confirming or denying the suspicion (null hypothesis) that the populations are
the same. It returns a t-value that you can then look up to give certain
judgements of confidence based on a t distribution table.
This implementation expects the samples `sample_x` and `sample_y` to be given
as one-dimensional arrays of more than one number each.
### .sample_variance(x)
Produces [sample variance](
of a single-dimensional array of numbers.
### .sample_covariance(a, b)
Produces [sample covariance](
of two single-dimensional arrays of numbers.
### .sample_correlation(a, b)
Produces [sample correlation](
of two single-dimensional arrays of numbers.
### .quantile(sample, p)
Does a [quantile]( of a dataset `sample`,
at p. For those familiary with the `k/q` syntax, `p == k/q`. `sample` must
be a single-dimensional array of numbers. p must be a number greater than or equal to zero and less than or equal to one, or an array of numbers following that rule.
If an array is given, an array of results will be returned instead of a single
### .chunk(sample, chunkSize)
Given a `sample` array, and a positive integer `chunkSize`, splits an array
into chunks of `chunkSize` size and returns an array of those chunks. This
does not change the input value. If the length of `sample` is not divisible
by `chunkSize`, the last array will be shorter than the rest.
### .quantile_sorted(sample, p)
Does a [quantile]( of a dataset `sample`,
at p. `sample` must be a one-dimensional _sorted_ array of numbers, and
`p` must be a single number greater than or equal to zero and less than or equal to one.
### .iqr(sample)
Calculates the [Interquartile range]( of
a sample - the difference between the upper and lower quartiles. Useful
as a measure of dispersion.
_Also available as `.interquartile_range(x)`_
### .sample_skewness(sample)
Calculates the [skewness]( of
a sample, a measure of the extent to which a probability distribution of a
real-valued random variable "leans" to one side of the mean.
The skewness value can be positive or negative, or even undefined.
This implementation uses the [Fisher-Pearson standardized moment coefficient](,
which means that it behaves the same as Excel, Minitab, SAS, and SPSS.
Skewness is only valid for samples of over three values.
### .jenks(data, number_of_classes)
Find the [Jenks Natural Breaks]( for
a single-dimensional array of numbers as input and a desired `number_of_classes`.
The result is a single-dimensional with class breaks, including the minimum
and maximum of the input array.
### .r_squared(data, function)
Find the [r-squared]( value of a particular dataset, expressed as a two-dimensional `Array` of numbers, against a `Function`.
var r_squared = ss.r_squared([[1, 1]], function(x) { return x * 2; });
### .cumulative_std_normal_probability(z)
Look up the given `z` value in a [standard normal table](
to calculate the probability of a random variable appearing with a given value.
### .z_score(x, mean, standard_deviation)
The standard score is the number of standard deviations an observation
or datum is above or below the mean.
### .standard_normal_table
A [standard normal table]( from
which to pull values of Φ (phi).
## Regression
### .linear_regression()
Create a new linear regression solver.
#### .data([[1, 1], [2, 2]])
Set the data of a linear regression. The input is a two-dimensional array of numbers, which are treated as coordinates, like `[[x, y], [x1, y1]]`.
#### .line()
Get the linear regression line: this returns a function that you can
give `x` values and it will return `y` values. Internally, this uses the `m()`
and `b()` values and the classic `y = mx + b` equation.
var linear_regression_line = ss.linear_regression()
.data([[0, 1], [2, 2], [3, 3]]).line();
#### .m()
Just get the slope of the fitted regression line, the `m` component of the full
line equation. Returns a number.
#### .b()
Just get the y-intercept of the fitted regression line, the `b` component
of the line equation. Returns a number.
## Classification
### .bayesian()
Create a naïve bayesian classifier.
### .train(item, category)
Train the classifier to classify a certain item, given as an object with keys,
to be in a certain category, given as a string.
### .score(item)
Get the classifications of a certain item, given as an object of
`category -> score` mappings.
var bayes = ss.bayesian();
bayes.train({ species: 'Cat' }, 'animal');
bayes.score({ species: 'Cat' });
// { animal: 1 }
## [Literate Source](
## Usage
To use it in browsers, grab [simple_statistics.js](
To use it in node, install it with [npm]( or add it to your package.json.
npm install simple-statistics
To use it with [component](,
component install tmcw/simple-statistics
To use it with [bower](,
bower install simple-statistics
## Basic Descriptive Statistics
// Require simple statistics
var ss = require('simple-statistics');
// The input is a simple array
var list = [1, 2, 3];
// Many different descriptive statistics are supported
var sum = ss.sum(list),
mean = ss.mean(list),
min = ss.min(list),
geometric_mean = ss.geometric_mean(list),
max = ss.max(list),
quantile = ss.quantile(0.25);
## Linear Regression
// For a linear regression, it's a two-dimensional array
var data = [ [1, 2], [2, 3] ];
// simple-statistics can produce a linear regression and return
// a friendly javascript function for the line.
var line = ss.linear_regression()
// get a point along the line function
var line = ss.linear_regression()
// Get the r-squared value of the line estimation
ss.r_squared(data, line);
### Bayesian Classifier
var bayes = ss.bayesian();
bayes.train({ species: 'Cat' }, 'animal');
bayes.score({ species: 'Cat' });
// { animal: 1 }
### Mixin Style
_This is **optional** and not used by default. You can opt-in to mixins
with `ss.mixin()`._
This mixes `simple-statistics` methods into the Array prototype - note that
[extending native objects]( is a
tricky move.
This will _only work_ if `defineProperty` is available, which means modern browsers
and nodejs - on IE8 and below, calling `ss.mixin()` will throw an exception.
// mixin to Array class
// The input is a simple array
var list = [1, 2, 3];
// The same descriptive techniques as above, but in a simpler style
var sum = list.sum(),
mean = list.mean(),
min = list.min(),
max = list.max(),
quantile = list.quantile(0.25);
## Examples
* [Linear regression with simple-statistics and d3js](
* [Jenks Natural Breaks with a choropleth map with d3js](
# Contributors
* Tom MacWright
* [Matt Sacks](
* Doron Linder
* [Alexander Sicular](
@ -0,0 +1,157 @@
[](
A JavaScript implementation of descriptive, regression, and inference statistics.
Implemented in literate JavaScript with no dependencies, designed to work
in all modern browsers (including IE) as well as in node.js.
# [API](
[Full documentation](
Basic Array Operations
.t_test(sample, x)
.t_test_two_sample(sample_x, sample_y, difference)
.quantile(sample, p)
.jenks(data, number_of_classes)
.r_squared(data, function)
.z_score(x, mean, standard_deviation)
.data([[1, 1], [2, 2]])
.train(item, category)
# [Literate Source](
## Usage
To use it in browsers, grab [simple_statistics.js](
To use it in node, install it with [npm]( or add it to your package.json.
npm install simple-statistics
To use it with [component](,
component install tmcw/simple-statistics
To use it with [bower](,
bower install simple-statistics
## Basic Descriptive Statistics
// Require simple statistics
var ss = require('simple-statistics');
// The input is a simple array
var list = [1, 2, 3];
// Many different descriptive statistics are supported
var sum = ss.sum(list),
mean = ss.mean(list),
min = ss.min(list),
geometric_mean = ss.geometric_mean(list),
max = ss.max(list),
quantile = ss.quantile(0.25);
## Linear Regression
// For a linear regression, it's a two-dimensional array
var data = [ [1, 2], [2, 3] ];
// simple-statistics can produce a linear regression and return
// a friendly javascript function for the line.
var line = ss.linear_regression()
// get a point along the line function
var line = ss.linear_regression()
// Get the r-squared value of the line estimation
ss.r_squared(data, line);
### Bayesian Classifier
var bayes = ss.bayesian();
bayes.train({ species: 'Cat' }, 'animal');
bayes.score({ species: 'Cat' });
// { animal: 1 }
### Mixin Style
_This is **optional** and not used by default. You can opt-in to mixins
with `ss.mixin()`._
This mixes `simple-statistics` methods into the Array prototype - note that
[extending native objects]( is a
tricky move.
This will _only work_ if `defineProperty` is available, which means modern browsers
and nodejs - on IE8 and below, calling `ss.mixin()` will throw an exception.
// mixin to Array class
// The input is a simple array
var list = [1, 2, 3];
// The same descriptive techniques as above, but in a simpler style
var sum = list.sum(),
mean = list.mean(),
min = list.min(),
max = list.max(),
quantile = list.quantile(0.25);
## Examples
* [Linear regression with simple-statistics and d3js](
* [Jenks Natural Breaks with a choropleth map with d3js](
# Contributors
* Tom MacWright
* [Matt Sacks](
* Doron Linder
* [Alexander Sicular](
@ -0,0 +1,23 @@
## See Also
* [stream-statistics](, a sister project that implements
many of the same measures for streaming data - as online algorithms
### Javascript
* [science.js](
* [atoll.js](
* [descriptive_statistics](
* [jStat](
* [classifier]( is a naive bayesian classifier (though specialized for the words-spam case)
* [underscore.math](
### Python
* [Pandas](
* [SciPy](
### Their Own Language
* [Julia Language](
* [R language](
@ -0,0 +1,20 @@
var fs = require('fs');
var readme = fs.readFileSync('', 'utf8')
var a = true, b = true;
fs.writeFileSync('', readme.filter(function(f) {
if (f === '---') {
a = !a;
return true;
return a;
}).map(function(f) {
if (f === '---' && b) {
f = f + '\n\n' + fs.readFileSync('', 'utf8') + '\n\n';
b = false;
return f;
@ -0,0 +1,11 @@
"name": "simple-statistics",
"version": "0.9.0",
"description": "Simple Statistics",
"repo": "tmcw/simple-statistics",
"keywords": [],
"license": "ISC",
"dependencies": {},
"development": {},
"main": "src/simple_statistics.js"
@ -0,0 +1,13 @@
"name": "simple-statistics",
"version": "0.9.0",
"description": "Simple Statistics",
"repo": "tmcw/simple-statistics",
"keywords": [],
"license": "ISC",
"dependencies": {},
"development": {},
"scripts": [
@ -0,0 +1,506 @@
/*--------------------- Typography ----------------------------*/
@font-face {
font-family: 'aller-light';
src: url('public/fonts/aller-light.eot');
src: url('public/fonts/aller-light.eot?#iefix') format('embedded-opentype'),
url('public/fonts/aller-light.woff') format('woff'),
url('public/fonts/aller-light.ttf') format('truetype');
font-weight: normal;
font-style: normal;
@font-face {
font-family: 'aller-bold';
src: url('public/fonts/aller-bold.eot');
src: url('public/fonts/aller-bold.eot?#iefix') format('embedded-opentype'),
url('public/fonts/aller-bold.woff') format('woff'),
url('public/fonts/aller-bold.ttf') format('truetype');
font-weight: normal;
font-style: normal;
@font-face {
font-family: 'novecento-bold';
src: url('public/fonts/novecento-bold.eot');
src: url('public/fonts/novecento-bold.eot?#iefix') format('embedded-opentype'),
url('public/fonts/novecento-bold.woff') format('woff'),
url('public/fonts/novecento-bold.ttf') format('truetype');
font-weight: normal;
font-style: normal;
/*--------------------- Layout ----------------------------*/
html { height: 100%; }
body {
font-family: "aller-light";
font-size: 14px;
line-height: 18px;
color: #30404f;
margin: 0; padding: 0;
#container { min-height: 100%; }
a {
color: #000;
b, strong {
font-weight: normal;
font-family: "aller-bold";
p {
margin: 15px 0 0px;
.annotation ul, .annotation ol {
margin: 25px 0;
.annotation ul li, .annotation ol li {
font-size: 14px;
line-height: 18px;
margin: 10px 0;
h1, h2, h3, h4, h5, h6 {
color: #112233;
line-height: 1em;
font-weight: normal;
font-family: "novecento-bold";
text-transform: uppercase;
margin: 30px 0 15px 0;
h1 {
margin-top: 40px;
hr {
border: 0;
background: 1px #ddd;
height: 1px;
margin: 20px 0;
pre, tt, code {
font-size: 12px; line-height: 16px;
font-family: Menlo, Monaco, Consolas, "Lucida Console", monospace;
margin: 0; padding: 0;
.annotation pre {
display: block;
margin: 0;
padding: 7px 10px;
background: #fcfcfc;
-moz-box-shadow: inset 0 0 10px rgba(0,0,0,0.1);
-webkit-box-shadow: inset 0 0 10px rgba(0,0,0,0.1);
box-shadow: inset 0 0 10px rgba(0,0,0,0.1);
overflow-x: auto;
.annotation pre code {
border: 0;
padding: 0;
background: transparent;
blockquote {
border-left: 5px solid #ccc;
margin: 0;
padding: 1px 0 1px 1em;
.sections blockquote p {
font-family: Menlo, Consolas, Monaco, monospace;
font-size: 12px; line-height: 16px;
color: #999;
margin: 10px 0 0;
white-space: pre-wrap;
ul.sections {
list-style: none;
padding:0 0 5px 0;;
Force border-box so that % widths fit the parent
container without overlap because of margin/padding.
More Info :
ul.sections > li > div {
-moz-box-sizing: border-box; /* firefox */
-ms-box-sizing: border-box; /* ie */
-webkit-box-sizing: border-box; /* webkit */
-khtml-box-sizing: border-box; /* konqueror */
box-sizing: border-box; /* css3 */
/*---------------------- Jump Page -----------------------------*/
#jump_to, #jump_page {
margin: 0;
background: white;
-webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777;
-webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px;
font: 16px Arial;
cursor: pointer;
text-align: right;
list-style: none;
#jump_to a {
text-decoration: none;
#jump_to a.large {
display: none;
#jump_to a.small {
font-size: 22px;
font-weight: bold;
color: #676767;
#jump_to, #jump_wrapper {
position: fixed;
right: 0; top: 0;
padding: 10px 15px;
#jump_wrapper {
display: none;
#jump_to:hover #jump_wrapper {
display: block;
#jump_page {
padding: 5px 0 3px;
margin: 0 0 25px 25px;
#jump_page .source {
display: block;
padding: 15px;
text-decoration: none;
border-top: 1px solid #eee;
#jump_page .source:hover {
background: #f5f5ff;
#jump_page .source:first-child {
/*---------------------- Low resolutions (> 320px) ---------------------*/
@media only screen and (min-width: 320px) {
.pilwrap { display: none; }
ul.sections > li > div {
display: block;
padding:5px 10px 0 10px;
ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol {
padding-left: 30px;
ul.sections > li > div.content {
-webkit-box-shadow: inset 0 0 5px #e5e5ee;
box-shadow: inset 0 0 5px #e5e5ee;
border: 1px solid #dedede;
margin:5px 10px 5px 10px;
padding-bottom: 5px;
ul.sections > li > div.annotation pre {
margin: 7px 0 7px;
padding-left: 15px;
ul.sections > li > div.annotation p tt, .annotation code {
background: #f8f8ff;
border: 1px solid #dedede;
font-size: 12px;
padding: 0 0.2em;
/*---------------------- (> 481px) ---------------------*/
@media only screen and (min-width: 481px) {
#container {
position: relative;
body {
background-color: #F5F5FF;
font-size: 15px;
line-height: 21px;
pre, tt, code {
line-height: 18px;
p, ul, ol {
margin: 0 0 15px;
#jump_to {
padding: 5px 10px;
#jump_wrapper {
padding: 0;
#jump_to, #jump_page {
font: 10px Arial;
text-transform: uppercase;
#jump_page .source {
padding: 5px 10px;
#jump_to a.large {
display: inline-block;
#jump_to a.small {
display: none;
#background {
position: absolute;
top: 0; bottom: 0;
width: 350px;
background: #fff;
border-right: 1px solid #e5e5ee;
z-index: -1;
ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol {
padding-left: 40px;
ul.sections > li {
white-space: nowrap;
ul.sections > li > div {
display: inline-block;
ul.sections > li > div.annotation {
max-width: 350px;
min-width: 350px;
min-height: 5px;
padding: 13px;
overflow-x: hidden;
white-space: normal;
vertical-align: top;
text-align: left;
ul.sections > li > div.annotation pre {
margin: 15px 0 15px;
padding-left: 15px;
ul.sections > li > div.content {
padding: 13px;
vertical-align: top;
border: none;
-webkit-box-shadow: none;
box-shadow: none;
.pilwrap {
position: relative;
display: inline;
.pilcrow {
font: 12px Arial;
text-decoration: none;
color: #454545;
position: absolute;
top: 3px; left: -20px;
padding: 1px 2px;
opacity: 0;
-webkit-transition: opacity 0.2s linear;
.for-h1 .pilcrow {
top: 47px;
.for-h2 .pilcrow, .for-h3 .pilcrow, .for-h4 .pilcrow {
top: 35px;
ul.sections > li > div.annotation:hover .pilcrow {
opacity: 1;
/*---------------------- (> 1025px) ---------------------*/
@media only screen and (min-width: 1025px) {
body {
font-size: 16px;
line-height: 24px;
#background {
width: 525px;
ul.sections > li > div.annotation {
max-width: 525px;
min-width: 525px;
padding: 10px 25px 1px 50px;
ul.sections > li > div.content {
padding: 9px 15px 16px 25px;
/*---------------------- Syntax Highlighting -----------------------------*/
td.linenos { background-color: #f0f0f0; padding-right: 10px; }
span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
|||| style (c) Vasily Polovnyov <>
pre code {
display: block; padding: 0.5em;
color: #000;
background: #f8f8ff
pre .hljs-comment,
pre .hljs-template_comment,
pre .hljs-diff .hljs-header,
pre .hljs-javadoc {
color: #408080;
font-style: italic
pre .hljs-keyword,
pre .hljs-assignment,
pre .hljs-literal,
pre .hljs-css .hljs-rule .hljs-keyword,
pre .hljs-winutils,
pre .hljs-javascript .hljs-title,
pre .hljs-lisp .hljs-title,
pre .hljs-subst {
color: #954121;
/*font-weight: bold*/
pre .hljs-number,
pre .hljs-hexcolor {
color: #40a070
pre .hljs-string,
pre .hljs-tag .hljs-value,
pre .hljs-phpdoc,
pre .hljs-tex .hljs-formula {
color: #219161;
pre .hljs-title,
pre .hljs-id {
color: #19469D;
pre .hljs-params {
color: #00F;
pre .hljs-javascript .hljs-title,
pre .hljs-lisp .hljs-title,
pre .hljs-subst {
font-weight: normal
pre .hljs-class .hljs-title,
pre .hljs-haskell .hljs-label,
pre .hljs-tex .hljs-command {
color: #458;
font-weight: bold
pre .hljs-tag,
pre .hljs-tag .hljs-title,
pre .hljs-rules .hljs-property,
pre .hljs-django .hljs-tag .hljs-keyword {
color: #000080;
font-weight: normal
pre .hljs-attribute,
pre .hljs-variable,
pre .hljs-instancevar,
pre .hljs-lisp .hljs-body {
color: #008080
pre .hljs-regexp {
color: #B68
pre .hljs-class {
color: #458;
font-weight: bold
pre .hljs-symbol,
pre .hljs-ruby .hljs-symbol .hljs-string,
pre .hljs-ruby .hljs-symbol .hljs-keyword,
pre .hljs-ruby .hljs-symbol .hljs-keymethods,
pre .hljs-lisp .hljs-keyword,
pre .hljs-tex .hljs-special,
pre .hljs-input_number {
color: #990073
pre .hljs-builtin,
pre .hljs-constructor,
pre .hljs-built_in,
pre .hljs-lisp .hljs-title {
color: #0086b3
pre .hljs-preprocessor,
pre .hljs-pi,
pre .hljs-doctype,
pre .hljs-shebang,
pre .hljs-cdata {
color: #999;
font-weight: bold
pre .hljs-deletion {
background: #fdd
pre .hljs-addition {
background: #dfd
pre .hljs-diff .hljs-change {
background: #0086b3
pre .hljs-chunk {
color: #aaa
pre .hljs-tex .hljs-formula {
opacity: 0.5;
File diff suppressed because it is too large
Load Diff
@ -0,0 +1 @@
<meta http-equiv="refresh" content="0;URL='docs/simple_statistics.html'">
@ -0,0 +1,28 @@
"name": "simple-statistics",
"version": "0.9.0",
"description": "Simple Statistics",
"author": "Tom MacWright <> (",
"repository": {
"type": "git",
"url": "git://"
"dependencies": {},
"devDependencies": {
"jshint": "2.5.3",
"coveralls": "~2.11.1",
"istanbul": "~0.3.0",
"tape": "~2.14.0",
"random-js": "~1.0.4"
"scripts": {
"test": "tape test/*.js",
"cov": "istanbul cover ./node_modules/.bin/tape test/*.js && coveralls < ./coverage/",
"api": "node api.js"
"main": "src/simple_statistics.js",
"engines": {
"node": "*"
"license": "ISC"
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,114 @@
var ss = require('../');
var test = require('tape');
test('bayes', function(t) {
test('makes an easy call with one training round', function(t) {
var bayes = ss.bayesian();
species: 'Cat'
}, 'animal');
species: 'Cat'
}), {
animal: 1
test('makes fify-fifty call', function(t) {
var bayes = ss.bayesian();
species: 'Cat'
}, 'animal');
species: 'Cat'
}, 'chair');
species: 'Cat'
}), {
animal: 0.5,
chair: 0.5
test('makes seventy-five/twenty-five call', function(t) {
var bayes = ss.bayesian();
species: 'Cat'
}, 'animal');
species: 'Cat'
}, 'animal');
species: 'Cat'
}, 'animal');
species: 'Cat'
}, 'chair');
species: 'Cat'
}), {
animal: 0.75,
chair: 0.25
test('tests multiple properties', function(t) {
var bayes = ss.bayesian();
species: 'Cat'
}, 'animal');
species: 'Cat'
}, 'animal');
species: 'Cat'
}, 'animal');
species: 'Cat'
}, 'chair');
species: 'Cat',
color: 'white'
}, 'chair');
color: 'white'
}), {
animal: 0,
chair: 0.2
test('classifies multiple things', function(t) {
var bayes = ss.bayesian();
species: 'Cat'
}, 'animal');
species: 'Dog'
}, 'animal');
species: 'Dog'
}, 'animal');
species: 'Cat'
}, 'chair');
species: 'Cat'
}), {
animal: 0.25,
chair: 0.25
species: 'Dog'
}), {
animal: 0.5,
chair: 0
@ -0,0 +1,17 @@
var test = require('tape');
var ss = require('../');
test('bernoulli_distribution', function(t) {
test('can return generate probability and cumulative probability distributions for p = 0.3', function(t) {
t.equal('object', typeof ss.bernoulli_distribution(0.3));
t.equal(ss.bernoulli_distribution(0.3)[0], 0.7, ss.epsilon);
t.equal(ss.bernoulli_distribution(0.3)[1], 0.3, ss.epsilon);
test('can return null when p is not a valid probability', function(t) {
t.equal(null, ss.bernoulli_distribution(-0.01), 'p should be greater than 0.0');
t.equal(null, ss.bernoulli_distribution(1.5), 'p should be less than 1.0');
@ -0,0 +1,31 @@
var test = require('tape');
var ss = require('../');
function rnd(n) {
return parseFloat(n.toFixed(4));
test('binomial_distribution', function(t) {
// Data given in the [Wikipedia example]( retrieved 29 Mar 2014
// Cumulative probabilities worked by hand to mitigate accumulated rounding errors.
test('can return generate probability and cumulative probability distributions for n = 6, p = 0.3', function(t) {
t.equal('object', typeof ss.binomial_distribution(6, 0.3));
t.equal(rnd(ss.binomial_distribution(6, 0.3)[0]), 0.1176, ss.epsilon);
t.equal(rnd(ss.binomial_distribution(6, 0.3)[1]), 0.3025, ss.epsilon);
t.equal(rnd(ss.binomial_distribution(6, 0.3)[2]), 0.3241, ss.epsilon);
t.equal(rnd(ss.binomial_distribution(6, 0.3)[3]), 0.1852, ss.epsilon);
t.equal(rnd(ss.binomial_distribution(6, 0.3)[4]), 0.0595, ss.epsilon);
t.equal(rnd(ss.binomial_distribution(6, 0.3)[5]), 0.0102, ss.epsilon);
t.equal(rnd(ss.binomial_distribution(6, 0.3)[6]), 0.0007, ss.epsilon);
test('can return null when p or n are not valid parameters', function(t) {
t.equal(null, ss.binomial_distribution(0, 0.5), 'n should be strictly positive');
t.equal(null, ss.binomial_distribution(1.5, 0.5), 'n should be an integer');
t.equal(null, ss.binomial_distribution(2, -0.01), 'p should be greater than 0.0');
t.equal(null, ss.binomial_distribution(2, 1.5), 'p should be less than 1.0');
@ -0,0 +1,23 @@
var test = require('tape');
var ss = require('../');
// Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery,
// "Probability and Statistics in Engineering and Management Science", Wiley (1980).
var data_10_19 = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3
test('chi_squared_goodness_of_fit', function(t) {
test('can reject the null hypothesis with level of confidence specified at 0.05', function(t) {
t.equal(false, ss.chi_squared_goodness_of_fit(data_10_19, ss.poisson_distribution, 0.05));
test('can accept the null hypothesis with level of confidence specified at 0.10', function(t) {
t.equal(true, ss.chi_squared_goodness_of_fit(data_10_19, ss.poisson_distribution, 0.10));
@ -0,0 +1,18 @@
var test = require('tape');
var ss = require('../');
test('chunks', function(t) {
test('can get chunks of an array', function(t) {
t.deepEqual(ss.chunk([1, 2], 1), [[1], [2]]);
t.deepEqual(ss.chunk([1, 2], 2), [[1, 2]]);
t.deepEqual(ss.chunk([1, 2, 3, 4], 4), [[1, 2, 3, 4]]);
t.deepEqual(ss.chunk([1, 2, 3, 4], 2), [[1, 2], [3, 4]]);
t.deepEqual(ss.chunk([1, 2, 3, 4], 3), [[1, 2, 3], [4]]);
t.deepEqual(ss.chunk([1, 2, 3, 4, 5, 6, 7], 2), [[1, 2], [3, 4], [5, 6], [7]]);
t.deepEqual(ss.chunk([], 2), []);
t.deepEqual(ss.chunk([], 0), null);
t.deepEqual(ss.chunk([1, 2], 0), null);
@ -0,0 +1,13 @@
var test = require('tape');
var ss = require('../');
test('cumulative_std_normal_probability', function(t) {
test('wikipedia test example works', function(t) {
for (var i = 0; i < ss.standard_normal_table.length; i++) {
t.equal(ss.cumulative_std_normal_probability(0.4), 0.6554);
@ -0,0 +1,22 @@
var test = require('tape');
var ss = require('../');
test('factorial', function(t) {
test('can return null given a negative number', function(t) {
t.equal(null, ss.factorial(-1));
test('can calculate 0! = 1', function(t) {
t.equal(ss.factorial(0), 1);
test('can calculate 1! = 1', function(t) {
t.equal(ss.factorial(1), 1);
test('can calculate 100! = 1', function(t) {
t.equal(ss.factorial(100), 9.33262154439441e+157);
@ -0,0 +1,23 @@
var test = require('tape');
var ss = require('../');
test('geometric mean', function(t) {
// From
test('can get the mean of two numbers', function(t) {
t.equal(ss.geometric_mean([2, 8]), 4);
t.equal(ss.geometric_mean([4, 1, 1 / 32]), 0.5);
t.equal(Math.round(ss.geometric_mean([2, 32, 1])), 4);
test('returns null for empty lists', function(t) {
t.equal(ss.geometric_mean([]), null);
test('returns null for lists with negative numbers', function(t) {
t.equal(ss.geometric_mean([-1]), null);
@ -0,0 +1,27 @@
var test = require('tape');
var ss = require('../');
function rnd(x) {
return Math.round(x * 1000) / 1000;
test('harmonic_mean', function(t) {
// From
test('can get the mean of two or more numbers', function(t) {
t.equal(ss.harmonic_mean([1, 1]), 1);
t.equal(rnd(ss.harmonic_mean([2, 3])), 2.4);
t.equal(ss.harmonic_mean([1, 2, 4]), 12 / 7);
test('returns null for empty lists', function(t) {
t.equal(ss.harmonic_mean([]), null);
test('returns null for lists with negative numbers', function(t) {
t.equal(ss.harmonic_mean([-1]), null);
@ -0,0 +1,24 @@
var test = require('tape');
var ss = require('../');
test('interquartile range (iqr)', function(t) {
// Data and results from
// [Wikipedia](
test('can get proper iqr of an even-length list', function(t) {
var even = [3, 6, 7, 8, 8, 10, 13, 15, 16, 20];
t.equal(ss.quantile(even, 0.75) - ss.quantile(even, 0.25), ss.iqr(even));
test('can get proper iqr of an odd-length list', function(t) {
var odd = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20];
t.equal(ss.quantile(odd, 0.75) - ss.quantile(odd, 0.25), ss.iqr(odd));
test('an iqr of a zero-length list produces null', function(t) {
t.equal(ss.iqr([]), null);
@ -0,0 +1,14 @@
var test = require('tape');
var ss = require('../');
test('jenks', function(t) {
test('will not try to assign more classes than datapoints', function(t) {
t.equal(ss.jenks([1, 2], 3), null);
test('assigns correct breaks', function(t) {
t.deepEqual(ss.jenks([1, 2, 4, 5, 7, 9, 10, 20], 3), [1, 2, 5, 20]);
@ -0,0 +1,54 @@
var test = require('tape');
var ss = require('../');
test('linear regression', function(t) {
test('correctly generates a line for a 0, 0 to 1, 1 dataset', function(t) {
var l = ss.linear_regression().data([[0, 0], [1, 1]]);
t.equal(l.line()(0), 0);
t.equal(l.line()(0.5), 0.5);
t.equal(l.line()(1), 1);
test('correctly generates a line for a 0, 0 to 1, 0 dataset', function(t) {
var l = ss.linear_regression().data([[0, 0], [1, 0]]);
t.equal(l.line()(0), 0);
t.equal(l.line()(0.5), 0);
t.equal(l.line()(1), 0);
test('returns the data assigned to it', function(t) {
var l = ss.linear_regression().data([[0, 0], [1, 0]]);
t.deepEqual(, [[0, 0], [1, 0]]);
test('handles a single-point sample', function(t) {
var l = ss.linear_regression().data([[0, 0]]).line();
t.deepEqual(l(10), 0);
test('a straight line will have a slope of 0', function(t) {
var l = ss.linear_regression().data([[0, 0], [1, 0]]);
t.equal(l.m(), 0);
t.equal(l.b(), 0);
test('a line at 50% grade', function(t) {
var l = ss.linear_regression().data([[0, 0], [1, 0.5]]);
t.equal(l.m(), 0.5);
t.equal(l.b(), 0);
test('a line with a high y-intercept', function(t) {
var l = ss.linear_regression().data([[0, 20], [1, 10]]);
t.equal(l.m(), -10);
t.equal(l.b(), 20);
@ -0,0 +1,26 @@
var test = require('tape');
var ss = require('../');
test('median absolute deviation (mad)', function(t) {
test('median absolute deviation of an example on wikipedia', function(t) {
t.equal(ss.mad([1, 1, 2, 2, 4, 6, 9]), 1);
// wolfram alpha: median absolute deviation {0,1,2,3,4,5,6,7,8,9,10}
test('median absolute deviation of 0-10', function(t) {
t.equal(ss.mad([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3);
test('median absolute deviation of one number is zero', function(t) {
t.equal(ss.mad([1]), 0);
test('zero-length corner case', function(t) {
t.equal(ss.mad([]), null);
@ -0,0 +1,18 @@
var test = require('tape');
var ss = require('../');
test('mean', function(t) {
test('can get the mean of two numbers', function(t) {
t.equal(ss.mean([1, 2]), 1.5);
test('can get the mean of one number', function(t) {
t.equal(ss.mean([1]), 1);
test('an empty list has no average', function(t) {
t.equal(ss.mean([]), null);
@ -0,0 +1,38 @@
var test = require('tape');
var ss = require('../');
test('median', function(t) {
test('can get the median of three numbers', function(t) {
t.equal(ss.median([1, 2, 3]), 2);
test('can get the median of two numbers', function(t) {
t.equal(ss.median([1, 2]), 1.5);
test('can get the median of four numbers', function(t) {
t.equal(ss.median([1, 2, 3, 4]), 2.5);
test('gives null for the median of an empty list', function(t) {
t.equal(ss.median([]), null);
test('sorts numbers numerically', function(t) {
t.equal(ss.median([8, 9, 10]), 9);
test('does not change the sorting order of its input', function(t) {
var x = [1, 0];
t.equal(ss.median(x), 0.5);
t.equal(x[0], 1);
t.equal(x[1], 0);
@ -0,0 +1,23 @@
var test = require('tape');
var ss = require('../');
test('min', function(t) {
test('can get the minimum of one number', function(t) {
t.equal(ss.min([1]), 1);
test('can get the minimum of three numbers', function(t) {
t.equal(ss.min([1, 7, -1000]), -1000);
test('max', function(t) {
test('can get the maximum of three numbers', function(t) {
t.equal(ss.max([1, 7, -1000]), 7);
@ -0,0 +1,34 @@
var test = require('tape');
var ss = require('../');
test('mixin', function(t) {
test('can mix into a single array', function(t) {
var even = ss.mixin([2, 4, 6, 8]);
t.equal(even.sum(), 20);
t.equal(even.mean(), 5);
t.equal(even.max(), 8);
t.equal(even.min(), 2);
t.equal(even.sample_skewness(), 0);
test('can mix into Array.prototype', function(t) {
var even = [2, 4, 6, 8];
t.equal(even.sum(), 20);
t.equal(even.mean(), 5);
t.equal(even.max(), 8);
t.equal(even.min(), 2);
t.equal(even.sample_skewness(), 0);
test('mixins can take arguments', function(t) {
var even = [2, 4, 6, 8];
t.equal(even.quantile(0.2), 2);
t.equal(even.quantile(0.8), 8);
@ -0,0 +1,37 @@
var test = require('tape');
var ss = require('../');
test('mode', function(t) {
test('the mode of a single-number array is that one number', function(t) {
t.equal(ss.mode([1]), 1);
test('the mode of a two-number array is that one number', function(t) {
t.equal(ss.mode([1, 1]), 1);
test('other cases', function(t) {
t.equal(ss.mode([1, 1, 2]), 1);
t.equal(ss.mode([1, 1, 2, 3]), 1);
t.equal(ss.mode([1, 1, 2, 3, 3]), 1);
t.equal(ss.mode([1, 1, 2, 3, 3, 3]), 3);
t.equal(ss.mode([1, 1, 2, 2, 2, 2, 3, 3, 3]), 2);
t.equal(ss.mode([1, 2, 3, 4, 5]), 1);
t.equal(ss.mode([1, 2, 3, 4, 5, 5]), 5);
t.equal(ss.mode([1, 1, 1, 2, 2, 3, 3, 4, 4]), 1);
test('the mode of an empty array is null', function(t) {
t.equal(ss.mode([]), null);
test('the mode of a three-number array with two same numbers is the repeated one', function(t) {
t.equal(ss.mode([1, 2, 2]), 2);
@ -0,0 +1,60 @@
var test = require('tape');
var ss = require('../');
test('natural distribution and z-score', function(t) {
test('normal table is exposed in the API', function(t) {
t.equal(ss.standard_normal_table.length, 310);
t.equal(ss.standard_normal_table[0], 0.5);
test('P(Z <= 0.4) is 0.6554', function(t) {
// Taken from the examples of use in
t.equal(ss.cumulative_std_normal_probability(0.4), 0.6554);
test('P(Z <= -1.20) is 0.1151', function(t) {
// Taken from the examples of use in
t.equal(ss.cumulative_std_normal_probability(-1.20), 0.1151);
test('P(X <= 82) when X ~ N (80, 25) is 0.6554', function(t) {
// Taken from the examples of use in
// A professor's exam scores are approximately distributed normally with mean 80 and standard deviation 5.
// What is the probability that a student scores an 82 or less?
t.equal(ss.cumulative_std_normal_probability(ss.z_score(82, 80, 5)), 0.6554);
test('P(X >= 90) when X ~ N (80, 25) is 0.0228', function(t) {
// Taken from the examples of use in
// A professor's exam scores are approximately distributed normally with mean 80 and standard deviation 5.
// What is the probability that a student scores a 90 or more?
t.equal(+(1 - ss.cumulative_std_normal_probability(ss.z_score(90, 80, 5))).toPrecision(5), 0.0228);
test('P(X <= 74) when X ~ N (80, 25) is 0.1151', function(t) {
// Taken from the examples of use in
// A professor's exam scores are approximately distributed normally with mean 80 and standard deviation 5.
// What is the probability that a student scores a 74 or less?
t.equal(ss.cumulative_std_normal_probability(ss.z_score(74, 80, 5)), 0.1151);
test('P(78 <= X <= 88) when X ~ N (80, 25) is 0.6006', function(t) {
// Taken from the examples of use in
// A professor's exam scores are approximately distributed normally with mean 80 and standard deviation 5.
// What is the probability that a student scores between 78 and 88?
var prob88 = ss.cumulative_std_normal_probability(ss.z_score(88, 80, 5)),
prob78 = ss.cumulative_std_normal_probability(ss.z_score(78, 80, 5));
t.equal(+(prob88 - prob78).toPrecision(5), 0.6006);
@ -0,0 +1,37 @@
var test = require('tape');
var ss = require('../');
function rnd(n) {
return parseFloat(n.toFixed(4));
// expected cumulative probabilities taken from Appendix 1, Table I of William W. Hines & Douglas C.
// Montgomery, "Probability and Statistics in Engineering and Management Science", Wiley (1980).
test('poisson_distribution', function(t) {
test('can return generate probability and cumulative probability distributions for lambda = 3.0', function(t) {
t.equal('object', typeof ss.poisson_distribution(3.0));
t.equal(rnd(ss.poisson_distribution(3.0)[3]), 0.2240, ss.epsilon);
test('can generate probability and cumulative probability distributions for lambda = 4.0', function(t) {
t.equal('object', typeof ss.poisson_distribution(4.0));
t.equal(rnd(ss.poisson_distribution(4.0)[2]), 0.1465, ss.epsilon);
test('can generate probability and cumulative probability distributions for lambda = 5.5', function(t) {
t.equal('object', typeof ss.poisson_distribution(5.5));
t.equal(rnd(ss.poisson_distribution(5.5)[7]), 0.1234, ss.epsilon);
test('can generate probability and cumulative probability distributions for lambda = 9.5', function(t) {
t.equal('object', typeof ss.poisson_distribution(9.5));
t.equal(rnd(ss.poisson_distribution(9.5)[17]), 0.0088, ss.epsilon);
test('can return null when lambda <= 0', function(t) {
t.equal(null, ss.poisson_distribution(0));
t.equal(null, ss.poisson_distribution(-10));
@ -0,0 +1,64 @@
var test = require('tape');
var ss = require('../');
test('quantile', function(t) {
// Data and results from
// [Wikipedia](
test('can get proper quantiles of an even-length list', function(t) {
var even = [3, 6, 7, 8, 8, 10, 13, 15, 16, 20];
t.equal(ss.quantile(even, 0.25), 7);
t.equal(ss.quantile(even, 0.5), 9);
t.equal(ss.quantile(even, 0.75), 15);
test('can get proper quantiles of an odd-length list', function(t) {
var odd = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20];
t.equal(ss.quantile(odd, 0.25), 7);
t.equal(ss.quantile(odd, 0.5), 9);
t.equal(ss.quantile(odd, 0.75), 15);
test('the median quantile is equal to the median', function(t) {
var rand = [1, 4, 5, 8];
t.equal(ss.quantile(rand, 0.5), ss.median(rand));
var rand2 = [10, 50, 2, 4, 4, 5, 8];
t.equal(ss.quantile(rand2, 0.5), ss.median(rand2));
test('a zero-length list produces null', function(t) {
t.equal(ss.quantile([], 0.5), null);
test('test odd-value case', function(t) {
t.equal(ss.quantile([0, 1, 2, 3, 4], 0.2), 1);
test('bad bounds produce null', function(t) {
t.equal(ss.quantile([1, 2, 3], 1.1), null);
t.equal(ss.quantile([1, 2, 3], -0.5), null);
test('max quantile is equal to the max', function(t) {
t.equal(ss.quantile([1, 2, 3], 1), ss.max([1, 2, 3]));
test('min quantile is equal to the min', function(t) {
t.equal(ss.quantile([1, 2, 3], 0), ss.min([1, 2, 3]));
test('if quantile arg is an array, response is an array of quantiles', function(t) {
var odd = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20];
t.deepEqual(ss.quantile(odd, [0, 0.25, 0.5, 0.75, 1]), [3, 7, 9, 15, 20]);
t.deepEqual(ss.quantile(odd, [0.75, 0.5]), [15, 9]);
@ -0,0 +1,15 @@
var test = require('tape');
var ss = require('../');
test('quantile_sorted', function(t) {
// Data and results from
// [Wikipedia](
test('can get proper quantiles of an even-length list', function(t) {
var even = [3, 6, 7, 8, 8, 10, 13, 15, 16, 20];
t.equal(ss.quantile_sorted(even, 0.25), 7);
t.equal(ss.quantile_sorted(even, 0.5), 9);
t.equal(ss.quantile_sorted(even, 0.75), 15);
@ -0,0 +1,26 @@
var test = require('tape');
var ss = require('../');
test('r-squared', function(t) {
test('says that the r squared of a two-point line is perfect', function(t) {
var d = [[0, 0], [1, 1]];
var l = ss.linear_regression().data(d);
t.equal(ss.r_squared(d, l.line()), 1);
test('says that the r squared of a three-point line is not perfect', function(t) {
var d = [[0, 0], [0.5, 0.2], [1, 1]];
var l = ss.linear_regression().data(d);
t.notEqual(ss.r_squared(d, l.line()), 1);
test('r-squared of single sample is 1', function(t) {
var d = [[0, 0]];
var l = ss.linear_regression().data(d);
t.equal(ss.r_squared(d, l.line()), 1);
@ -0,0 +1,23 @@
var test = require('tape');
var ss = require('../');
function rnd(x) {
return Math.round(x * 1000) / 1000;
test('root_mean_square', function(t) {
// From
test('can get the RMS of two or more numbers', function(t) {
t.equal(ss.root_mean_square([1, 1]), 1);
t.equal(rnd(ss.root_mean_square([3, 4, 5])), 4.082);
t.equal(rnd(ss.root_mean_square([-0.1, 5, -2, 10])), 5.679);
test('returns null for empty lists', function(t) {
t.equal(ss.root_mean_square([]), null);
@ -0,0 +1,19 @@
var test = require('tape');
var Random = require('random-js');
var random = new Random(Random.engines.mt19937().seed(0));
var ss = require('../');
function rng() { return random.real(0, 1); }
test('sample', function(t) {
t.deepEqual(ss.sample([], 0, rng), [], 'edge case - zero array');
t.deepEqual(ss.sample([], 2, rng), [], 'edge case - zero array');
t.deepEqual(ss.sample([1,2,3], 0, rng, 0), [], 'edge case - zero array');
t.deepEqual(ss.sample([1,2,3], 1, rng), [1], 'edge case - sample of 1');
t.deepEqual(ss.sample([1,2,3], 1, rng), [2]);
t.deepEqual(ss.sample([1,2,3], 3, rng), [2,3,1]);
t.deepEqual(ss.sample([1,2,3,4], 2, rng), [3,1]);
t.deepEqual(ss.sample([1,2,3,4,6,7,8], 2, rng), [8,7]);
t.deepEqual(ss.sample(['foo', 'bar'], 1, rng), ['foo'], 'non-number contents');
@ -0,0 +1,29 @@
var test = require('tape');
var ss = require('../');
function rnd(x) {
return Math.round(x * 1000) / 1000;
test('sample correlation', function(t) {
test('can get the sample correlation of identical arrays', function(t) {
var data = [1, 2, 3, 4, 5, 6];
t.equal(rnd(ss.sample_correlation(data, data)), 1);
test('can get the sample correlation of different arrays', function(t) {
var a = [1, 2, 3, 4, 5, 6];
var b = [2, 2, 3, 4, 5, 60];
t.equal(rnd(ss.sample_correlation(a, b)), 0.691);
test('zero-length corner case', function(t) {
t.equal(rnd(ss.sample_correlation([], [])), 0);
@ -0,0 +1,34 @@
var test = require('tape');
var ss = require('../');
function rnd(x) {
return Math.round(x * 1000) / 1000;
test('sample covariance', function(t) {
test('can get perfect negative covariance', function(t) {
var x = [1, 2, 3, 4, 5, 6];
var y = [6, 5, 4, 3, 2, 1];
t.equal(rnd(ss.sample_covariance(x, y)), -3.5);
test('covariance of something with itself is its variance', function(t) {
var x = [1, 2, 3, 4, 5, 6];
t.equal(rnd(ss.sample_covariance(x, x)), 3.5);
test('covariance is zero for something with no correlation', function(t) {
var x = [1, 2, 3, 4, 5, 6];
var y = [1, 1, 2, 2, 1, 1];
t.equal(rnd(ss.sample_covariance(x, y)), 0);
test('zero-length corner case', function(t) {
t.equal(rnd(ss.sample_covariance([], [])), 0);
@ -0,0 +1,48 @@
var test = require('tape');
var ss = require('../');
test('sample skewness', function(t) {
test('the skewness of an empty sample is null', function(t) {
var data = [];
t.equal(ss.sample_skewness(data), null);
test('the skewness of an sample with one number is null', function(t) {
var data = [1];
t.equal(ss.sample_skewness(data), null);
test('the skewness of an sample with two numbers is null', function(t) {
var data = [1, 2];
t.equal(ss.sample_skewness(data), null);
test('can calculate the skewness of SAS example 1', function(t) {
// Data and answer taken from SKEWNESS function documentation at
var data = [0, 1, 1];
t.equal(+ss.sample_skewness(data).toPrecision(10), -1.732050808);
test('can calculate the skewness of SAS example 2', function(t) {
// Data and answer taken from SKEWNESS function documentation at
var data = [2, 4, 6, 3, 1];
t.equal(+ss.sample_skewness(data).toPrecision(10), 0.5901286564);
test('can calculate the skewness of SAS example 3', function(t) {
// Data and answer taken from SKEWNESS function documentation at
var data = [2, 0, 0];
t.equal(+ss.sample_skewness(data).toPrecision(10), 1.732050808);
@ -0,0 +1,19 @@
var test = require('tape');
var ss = require('../');
function rnd(x) {
return Math.round(x * 1000) / 1000;
test('sample_standard_deviation', function(t) {
test('can get the standard deviation of an example on wikipedia', function(t) {
t.equal(rnd(ss.sample_standard_deviation([2, 4, 4, 4, 5, 5, 7, 9])), 2.138);
test('zero-length corner case', function(t) {
t.equal(rnd(ss.sample_standard_deviation([])), 0);
@ -0,0 +1,38 @@
var test = require('tape');
var ss = require('../');
function rnd(x) {
return Math.round(x * 1000) / 1000;
test('sample variance', function(t) {
test('can get the sample variance of a six-sided die', function(t) {
t.equal(rnd(ss.sample_variance([1, 2, 3, 4, 5, 6])), 3.5);
// confirmed in R
// > var(1:10)
// [1] 9.166667
test('can get the sample variance of numbers 1-10', function(t) {
t.equal(rnd(ss.sample_variance([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])), 9.167);
test('the sample variance of two numbers that are the same is 0', function(t) {
t.equal(rnd(ss.sample_variance([1, 1])), 0);
test('the sample variance of one number is null', function(t) {
t.equal(ss.sample_variance([1]), null);
test('the sample variance of no numbers is null', function(t) {
t.equal(ss.sample_variance([]), null);
@ -0,0 +1,24 @@
var test = require('tape');
var Random = require('random-js');
var random = new Random(Random.engines.mt19937().seed(0));
var ss = require('../');
function rng() { return random.real(0, 1); }
test('shuffle', function(t) {
var input = [1, 2, 3, 4, 5, 6];
t.deepEqual(ss.shuffle([], rng), []);
t.deepEqual(ss.shuffle(input, rng), [1, 5, 3, 2, 4, 6]);
t.deepEqual(input, [1, 2, 3, 4, 5, 6], 'does not change original array');
t.deepEqual(ss.shuffle(input, rng), [5, 4, 1, 3, 6, 2]);
t.deepEqual(input, [1, 2, 3, 4, 5, 6], 'does not change original array');
test('shuffle_in_place', function(t) {
var input = [1, 2, 3, 4, 5, 6];
t.deepEqual(ss.shuffle_in_place([], rng), []);
t.deepEqual(ss.shuffle_in_place(input, rng), [6, 1, 5, 2, 4, 3]);
t.deepEqual(input, [6, 1, 5, 2, 4, 3], 'changes original array');
@ -0,0 +1,39 @@
var test = require('tape');
var ss = require('../');
function rnd(x) {
return Math.round(x * 1000) / 1000;
test('standard_deviation', function(t) {
test('can get the standard deviation of an example on wikipedia', function(t) {
t.equal(rnd(ss.standard_deviation([2, 4, 4, 4, 5, 5, 7, 9])), 2);
// confirmed with numpy
// In [4]: numpy.std([1,2,3])
// Out[4]: 0.81649658092772603
test('can get the standard deviation of 1-3', function(t) {
t.equal(rnd(ss.standard_deviation([1, 2, 3])), 0.816);
test('zero-length array corner case', function(t) {
t.equal(rnd(ss.standard_deviation([])), 0);
// In [6]: numpy.std([0,1,2,3,4,5,6,7,8,9,10])
// Out[6]: 3.1622776601683795
test('can get the standard deviation of 1-10', function(t) {
t.equal(rnd(ss.standard_deviation([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])), 3.162);
test('the standard deviation of one number is zero', function(t) {
t.equal(rnd(ss.standard_deviation([1])), 0);
@ -0,0 +1,14 @@
var test = require('tape');
var ss = require('../');
test('standard_normal_table', function(t) {
test('all entries are numeric', function(t) {
for (var i = 0; i < ss.standard_normal_table.length; i++) {
t.equal(typeof ss.standard_normal_table[i], 'number');
t.ok(ss.standard_normal_table[i] >= 0);
t.ok(ss.standard_normal_table[i] <= 1);
@ -0,0 +1,15 @@
var test = require('tape');
var ss = require('../');
test('sum', function(t) {
test('can get the sum of two numbers', function(t) {
t.equal(ss.sum([1, 2]), 3);
test('the sum of no numbers is zero', function(t) {
t.equal(ss.sum([]), 0);
@ -0,0 +1,38 @@
var test = require('tape'),
ss = require('../');
test('t test', function(t) {
test('can compare a known value to the mean of samples', function(t) {
var res = ss.t_test([1, 2, 3, 4, 5, 6], 3.385);
t.equal(res, 0.1649415480881466);
test('can test independency of two samples', function(t) {
var res = ss.t_test_two_sample([1, 2, 3, 4], [3, 4, 5, 6], 0);
t.equal(res, -2.1908902300206643);
test('can test independency of two samples (mu == -2)', function(t) {
var res = ss.t_test_two_sample([1, 2, 3, 4], [3, 4, 5, 6], -2);
t.equal(res, 0);
test('can test independency of two samples of different lengths', function(t) {
var res = ss.t_test_two_sample([1, 2, 3, 4], [3, 4, 5, 6, 1, 2, 0]);
t.equal(res, -0.4165977904505309);
test('has an edge case for one sample being of size zero', function(t) {
t.equal(ss.t_test_two_sample([1, 2, 3, 4], []), null);
t.equal(ss.t_test_two_sample([], [1, 2, 3, 4]), null);
t.equal(ss.t_test_two_sample([], []), null);
@ -0,0 +1,24 @@
var test = require('tape');
var ss = require('../');
function rnd(x) {
return Math.round(x * 1000) / 1000;
test('variance', function(t) {
test('can get the variance of a six-sided die', function(t) {
t.equal(rnd(ss.variance([1, 2, 3, 4, 5, 6])), 2.917);
test('the variance of one number is zero', function(t) {
t.equal(rnd(ss.variance([1])), 0);
test('the variance of no numbers is null', function(t) {
t.equal(ss.variance([]), null);
@ -1,119 +0,0 @@
function setImmediate(cb) {
setTimeout(cb, 0);
function parse_query() {
var query =;
var parsed = {};
query.split('&').forEach(function(pair) {
pair = pair.split('=');
var key = decodeURIComponent(pair[0]);
var value = decodeURIComponent(pair[1]);
parsed[key] = value;
return parsed;
require(["Filer", "util"], function(Filer, util) {
function time(test, cb) {
var start =;
function done() {
var end =;
cb(end - start);
var random_data = new Uint8Array(1024); // 1kB buffer
var read_buffer = new Uint8Array(1024);
function run(iter) {
iter = (undefined == iter) ? 0 : iter;
function before() {
util.setup(function() {
function during() {
var fs = util.fs();
time(function(done) {
fs.mkdir('/tmp', function(err) {
fs.stat('/tmp', function(err, stats) {
||||'/tmp/test', 'w', function(err, fd) {
fs.write(fd, random_data, null, null, null, function(err, nbytes) {
fs.close(fd, function(err) {
fs.stat('/tmp/test', function(err, stats) {
||||'/tmp/test', 'r', function(err, fd) {
||||, read_buffer, null, null, null, function(err, nbytes) {
fs.close(fd, function(err) {
fs.unlink('/tmp/test', function(err) {
}, after);
function after(dt) {
util.cleanup(complete.bind(null, iter, dt));
var results = [];
function complete(iter, result) {
if(++iter < iterations) {
nextTick(run.bind(null, iter));
} else {
progress.value = iter;
function do_stats() {
var output = document.getElementById("output");
var stats = {
mean: ss.mean(results) + " ms",
min: ss.min(results),
max: ss.max(results),
med_abs_dev: ss.median_absolute_deviation(results),
var t = document.createElement("table");
var tbody = document.createElement("tbody");
var keys = Object.keys(stats);
keys.forEach(function(key) {
var row = document.createElement("tr");
var key_cell = document.createElement("td");
var key_cell_text = document.createTextNode(key);
var val_cell = document.createElement("td");
var val_cell_text = document.createTextNode(stats[key]);
var query = parse_query();
var iterations = query.iterations || 10;
var progress = document.getElementById("progress");
progress.max = iterations;
@ -1,17 +0,0 @@
<meta charset="utf-8" />
<progress id="progress" value=0></progress>
<div id="output"></div>
<div id="stderr"></div>
<script src="../dist/filer.js"></script>
<script type="text/javascript" src=""></script>
<script type="text/javascript" src=""></script>
<script type="text/javascript" src=""></script>
<script type="text/javascript" src="./index"></script>
<script src="./filer-perf-test.js"></script>
Reference in New Issue