diff --git a/gruntfile.js b/gruntfile.js index 8d10d25..a4154f6 100644 --- a/gruntfile.js +++ b/gruntfile.js @@ -15,7 +15,7 @@ module.exports = function(grunt) { grunt.initConfig({ pkg: grunt.file.readJSON('package.json'), - clean: ['dist/filer-test.js', 'dist/filer-issue225.js'], + clean: ['dist/filer-test.js', 'dist/filer-issue225.js', 'dist/filer-perf.js'], uglify: { options: { @@ -65,9 +65,9 @@ module.exports = function(grunt) { exclude: ["./node_modules/request/index.js"] } }, - perf: { - src: "./tests/perf/index.js", - dest: "./tests/perf/filer-perf-test.js", + filerPerf: { + src: "./perf/index.js", + dest: "./dist/filer-perf.js", options: { browserifyOptions: { commondir: false @@ -186,7 +186,7 @@ module.exports = function(grunt) { gitadd: { publish: { files: { - src: ['./dist/filer-test.js'] + src: ['./dist/filer-test.js', './dist/filer-perf.js'] } } }, @@ -204,7 +204,7 @@ module.exports = function(grunt) { gitrm: { publish: { files: { - src: ['./dist/filer-test.js'] + src: ['./dist/filer-test.js', './dist/filer-perf.js'] } } }, @@ -231,7 +231,7 @@ module.exports = function(grunt) { position: "top" }, files: { - src: ['./dist/filer-test.js'] + src: ['./dist/filer-test.js', './dist/filer-perf.js'] } } } @@ -251,7 +251,7 @@ module.exports = function(grunt) { grunt.loadNpmTasks('grunt-banner'); grunt.registerTask('develop', ['clean', 'browserify:filerDist', 'browserify:filerIssue225']); - grunt.registerTask('build-tests', ['clean', 'browserify:filerTest']); + grunt.registerTask('build-tests', ['clean', 'browserify:filerTest', 'browserify:filerPerf']); grunt.registerTask('release', ['test', 'develop', 'uglify']); grunt.registerTask('publish', 'Publish filer as a new version to NPM, bower and github.', function(patchLevel) { diff --git a/perf/index.html b/perf/index.html new file mode 100644 index 0000000..7ca17db --- /dev/null +++ b/perf/index.html @@ -0,0 +1,13 @@ + + + + + + + +
+
+ + + + diff --git a/perf/index.js b/perf/index.js new file mode 100644 index 0000000..f7d9354 --- /dev/null +++ b/perf/index.js @@ -0,0 +1,120 @@ +var Filer = require('..'); +var util = require('../tests/lib/test-utils.js'); + +function setImmediate(cb) { + setTimeout(cb, 0); +} + +function parse_query() { + var query = window.location.search.substring(1); + var parsed = {}; + query.split('&').forEach(function(pair) { + pair = pair.split('='); + var key = decodeURIComponent(pair[0]); + var value = decodeURIComponent(pair[1]); + parsed[key] = value; + }); + return parsed; +} + +var query = parse_query(); + +function time(test, cb) { + var start = performance.now(); + function done() { + var end = performance.now(); + cb(end - start); + } + test(done); +} + +var random_data = new Buffer(1024); // 1kB buffer +var read_buffer = new Buffer(1024); + +function run(iter) { + iter = (undefined == iter) ? 0 : iter; + + function before() { + util.setup(function() { + setImmediate(during); + }); + } + + function during() { + var fs = util.fs(); + + window.crypto.getRandomValues(random_data); + time(function(done) { + fs.mkdir('/tmp', function(err) { + fs.stat('/tmp', function(err, stats) { + fs.open('/tmp/test', 'w', function(err, fd) { + fs.write(fd, random_data, null, null, null, function(err, nbytes) { + fs.close(fd, function(err) { + fs.stat('/tmp/test', function(err, stats) { + fs.open('/tmp/test', 'r', function(err, fd) { + fs.read(fd, read_buffer, null, null, null, function(err, nbytes) { + fs.close(fd, function(err) { + fs.unlink('/tmp/test', function(err) { + done(); + });});});});});});});});});}); + }, after); + } + + function after(dt) { + util.cleanup(complete.bind(null, iter, dt)); + } + + before(); +} + +var results = []; +function complete(iter, result) { + results.push(result); + + if(++iter < iterations) { + setImmediate(run.bind(null, iter)); + } else { + do_stats(); + } + + progress.value = iter; +} + +function do_stats() { + var output = document.getElementById("output"); + var stats = { + mean: ss.mean(results) + " ms", + min: ss.min(results), + max: ss.max(results), + med_abs_dev: ss.median_absolute_deviation(results), + }; + + var t = document.createElement("table"); + var tbody = document.createElement("tbody"); + var keys = Object.keys(stats); + keys.forEach(function(key) { + var row = document.createElement("tr"); + + var key_cell = document.createElement("td"); + var key_cell_text = document.createTextNode(key); + key_cell.appendChild(key_cell_text); + row.appendChild(key_cell); + + var val_cell = document.createElement("td"); + var val_cell_text = document.createTextNode(stats[key]); + val_cell.appendChild(val_cell_text); + row.appendChild(val_cell); + + tbody.appendChild(row); + }); + + t.appendChild(tbody); + output.appendChild(t); +} + +var query = parse_query(); +var iterations = query.iterations || 10; +var progress = document.getElementById("progress"); +progress.max = iterations; + +run(); diff --git a/perf/simple-statistics/.gitignore b/perf/simple-statistics/.gitignore new file mode 100644 index 0000000..aa6fd7c --- /dev/null +++ b/perf/simple-statistics/.gitignore @@ -0,0 +1,3 @@ +components +build +node_modules \ No newline at end of file diff --git a/perf/simple-statistics/.jshintrc b/perf/simple-statistics/.jshintrc new file mode 100644 index 0000000..4891ee5 --- /dev/null +++ b/perf/simple-statistics/.jshintrc @@ -0,0 +1,8 @@ +{ + "indent": 4, + "undef": true, + "unused": true, + "globals": { + "require": true + } +} diff --git a/perf/simple-statistics/.travis.yml b/perf/simple-statistics/.travis.yml new file mode 100644 index 0000000..1d867b9 --- /dev/null +++ b/perf/simple-statistics/.travis.yml @@ -0,0 +1,7 @@ +language: node_js +node_js: + - 0.10 +script: + - npm install + - npm test + - npm run cov diff --git a/perf/simple-statistics/API.md b/perf/simple-statistics/API.md new file mode 100644 index 0000000..7936bc6 --- /dev/null +++ b/perf/simple-statistics/API.md @@ -0,0 +1,242 @@ +Basic contracts of functions: + +* Functions do not modify their arguments e.g. change their order +* Invalid input, like empty lists to functions that need 1+ items to work, will cause functions to return `null`. + +# Basic Array Operations + +### .mixin(array) + +_Optionally_ mix in the following functions into the `Array` prototype. Otherwise +you can use them off of the simple-statistics object itself. + +If given a particular array instance as an argument, this adds the functions +only to that array rather than the global `Array.prototype`. Without an argument, +it runs on the global `Array.prototype`. + +### .mean(x) + +Mean of a single-dimensional Array of numbers. _Also available as `.average(x)`_ + +### .sum(x) + +Sum of a single-dimensional Array of numbers. + +### .mode(x) + +Returns the number that appears most frequently in a single-dimensional Array +of numbers. If there are multiple modes, the one that appears last +is returned. + +### .variance(x) + +[Variance](http://en.wikipedia.org/wiki/Variance) of a single-dimensional Array of numbers. + +### .standard_deviation(x) + +[Standard Deviation](http://en.wikipedia.org/wiki/Standard_deviation) of a single-dimensional Array of numbers. + +### .sample(array, n) + +Return a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample) +of the given array. The sampling is _without replacement_, and uses a Fisher-Yates +sample to randomize. + +### .median_absolute_deviation(x) + +The Median Absolute Deviation (MAD) is a robust measure of statistical +dispersion. It is more resilient to outliers than the standard deviation. +Accepts a single-dimensional array of numbers and returns a dispersion value. + +Also aliased to `.mad(x)` for brevity. + +### .median(x) + +[Median](http://en.wikipedia.org/wiki/Median) of a single-dimensional array of numbers. + +### .geometric_mean(x) + +[Geometric mean](http://en.wikipedia.org/wiki/Geometric_mean) of a single-dimensional array of **positive** numbers. + +### .harmonic_mean(x) + +[Harmonic mean](http://en.wikipedia.org/wiki/Harmonic_mean) of a single-dimensional array of **positive** numbers. + +### .root_mean_square(x) + +[Root mean square (RMS)](http://en.wikipedia.org/wiki/Root_mean_square) of a single-dimensional array of numbers. + +Also aliased to `.rms(x)` for brevity. + +### .min(x) + +Finds the minimum of a single-dimensional array of numbers. This runs in linear `O(n)` time. + +### .max(x) + +Finds the maximum of a single-dimensional array of numbers. This runs in linear `O(n)` time. + +### .t_test(sample, x) + +Does a [student's t-test](http://en.wikipedia.org/wiki/Student's_t-test) of a dataset `sample`, represented by a single-dimensional array of numbers. `x` is the known value, and the result is a measure of [statistical significance](http://en.wikipedia.org/wiki/Statistical_significance). + +### .t_test_two_sample(sample_x, sample_y, difference) + +The two-sample t-test is used to compare samples from two populations or groups, +confirming or denying the suspicion (null hypothesis) that the populations are +the same. It returns a t-value that you can then look up to give certain +judgements of confidence based on a t distribution table. + +This implementation expects the samples `sample_x` and `sample_y` to be given +as one-dimensional arrays of more than one number each. + +### .sample_variance(x) + +Produces [sample variance](http://mathworld.wolfram.com/SampleVariance.html) +of a single-dimensional array of numbers. + +### .sample_covariance(a, b) + +Produces [sample covariance](http://en.wikipedia.org/wiki/Sample_mean_and_sample_covariance) +of two single-dimensional arrays of numbers. + +### .sample_correlation(a, b) + +Produces [sample correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) +of two single-dimensional arrays of numbers. + +### .quantile(sample, p) + +Does a [quantile](http://en.wikipedia.org/wiki/Quantile) of a dataset `sample`, +at p. For those familiary with the `k/q` syntax, `p == k/q`. `sample` must +be a single-dimensional array of numbers. p must be a number greater than or equal to +than zero and less or equal to than one, or an array of numbers following that rule. +If an array is given, an array of results will be returned instead of a single +number. + +### .chunk(sample, chunkSize) + +Given a `sample` array, and a positive integer `chunkSize`, splits an array +into chunks of `chunkSize` size and returns an array of those chunks. This +does not change the input value. If the length of `sample` is not divisible +by `chunkSize`, the last array will be shorter than the rest. + +### .shuffle(sample) + +Given a `sample` array (with any type of contents), return a random permutation +of that array, using the [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) +algorithm. + +### .shuffle_in_place(sample) + +Given a `sample` array (with any type of contents), return a random permutation +of that array, using the [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) +algorithm. + +This changes the input array in-place, as well as returns it - unlike `.shuffle()`, +it does not create a shallow copy of the array. + +### .quantile_sorted(sample, p) + +Does a [quantile](http://en.wikipedia.org/wiki/Quantile) of a dataset `sample`, +at p. `sample` must be a one-dimensional _sorted_ array of numbers, and +`p` must be a single number from zero to one. + +### .iqr(sample) + +Calculates the [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) of +a sample - the difference between the upper and lower quartiles. Useful +as a measure of dispersion. + +_Also available as `.interquartile_range(x)`_ + +### .sample_skewness(sample) + +Calculates the [skewness](http://en.wikipedia.org/wiki/Skewness) of +a sample, a measure of the extent to which a probability distribution of a +real-valued random variable "leans" to one side of the mean. +The skewness value can be positive or negative, or even undefined. + +This implementation uses the [Fisher-Pearson standardized moment coefficient](http://en.wikipedia.org/wiki/Skewness#Pearson.27s_skewness_coefficients), +which means that it behaves the same as Excel, Minitab, SAS, and SPSS. + +Skewness is only valid for samples of over three values. + +### .jenks(data, number_of_classes) + +Find the [Jenks Natural Breaks](http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization) for +a single-dimensional array of numbers as input and a desired `number_of_classes`. +The result is a single-dimensional with class breaks, including the minimum +and maximum of the input array. + +### .r_squared(data, function) + +Find the [r-squared](http://en.wikipedia.org/wiki/Coefficient_of_determination) value of a particular dataset, expressed as a two-dimensional `Array` of numbers, against a `Function`. + + var r_squared = ss.r_squared([[1, 1]], function(x) { return x * 2; }); + +### .cumulative_std_normal_probability(z) + +Look up the given `z` value in a [standard normal table](http://en.wikipedia.org/wiki/Standard_normal_table) +to calculate the probability of a random variable appearing with a given value. + +### .z_score(x, mean, standard_deviation) + +The standard score is the number of standard deviations an observation +or datum is above or below the mean. + +### .standard_normal_table + +A [standard normal table](http://en.wikipedia.org/wiki/Standard_normal_table) from +which to pull values of Φ (phi). + +## Regression + +### .linear_regression() + +Create a new linear regression solver. + +#### .data([[1, 1], [2, 2]]) + +Set the data of a linear regression. The input is a two-dimensional array of numbers, which are treated as coordinates, like `[[x, y], [x1, y1]]`. + +#### .line() + +Get the linear regression line: this returns a function that you can +give `x` values and it will return `y` values. Internally, this uses the `m()` +and `b()` values and the classic `y = mx + b` equation. + + var linear_regression_line = ss.linear_regression() + .data([[0, 1], [2, 2], [3, 3]]).line(); + linear_regression_line(5); + +#### .m() + +Just get the slope of the fitted regression line, the `m` component of the full +line equation. Returns a number. + +#### .b() + +Just get the y-intercept of the fitted regression line, the `b` component +of the line equation. Returns a number. + +## Classification + +### .bayesian() + +Create a naïve bayesian classifier. + +### .train(item, category) + +Train the classifier to classify a certain item, given as an object with keys, +to be in a certain category, given as a string. + +### .score(item) + +Get the classifications of a certain item, given as an object of +`category -> score` mappings. + + var bayes = ss.bayesian(); + bayes.train({ species: 'Cat' }, 'animal'); + bayes.score({ species: 'Cat' }); + // { animal: 1 } diff --git a/perf/simple-statistics/CHANGELOG.md b/perf/simple-statistics/CHANGELOG.md new file mode 100644 index 0000000..f354935 --- /dev/null +++ b/perf/simple-statistics/CHANGELOG.md @@ -0,0 +1,60 @@ +# CHANGELOG + +## 0.9.0 + +* Adds `.sample` for simple random sampling +* Adds `.shuffle` and `.shuffle_in_place` for random permutations +* Adds `.chunk` for splitting arrays into chunked subsets + +## 0.8.1 + +* fixes a bug in `mode` that favored the last new number + +## 0.8.0 + +* `mixin` can now take an array in order to mixin functions into a single array + instance rather than the global Array prototype. + +## 0.7.0 + +* Adds `simple_statistics.harmonic_mean` thanks to [jseppi](https://github.com/jseppi) + +## 0.6.0 + +* Adds `simple_statistics.quantile_sorted` thanks to [rluta](http://github.com/rluta) +* `simple_statistics.quantile` now accepts a sorted list of quantiles as a second argument +* Improved test coverage + +## 0.5.0 + +* Adds `simple_statistics.cumulative_std_normal_probability` by [doronlinder](https://github.com/doronlinder) +* Adds `simple_statistics.z_score` by doronlinder +* Adds `simple_statistics.standard_normal_table` + +## 0.4.0 + +* Adds `simple_statistics.median_absolute_deviation()` by siculars +* Adds `simple_statistics.iqr()` by siculars +* Adds `simple_statistics.skewness()` by Doron Linder +* Lower-level accessors for linear regression allow users to do the line + equation themselves + +## 0.3.0 + +* Adds `simple_statistics.jenks()` +* Adds `simple_statistics.jenksMatrices()` +* Improves test coverage and validation + +## 0.2.0 + +* Adds `simple_statistics.quantile()` +* Adds `simple_statistics.mixin()` +* Adds `simple_statistics.geometric_mean()` +* Adds `simple_statistics.sample_variance()` +* Adds `simple_statistics.sample_covariance()` + +## 0.1.0 + +* Adds `simple_statistics.t_test()` +* Adds `simple_statistics.min()` +* Adds `simple_statistics.max()` diff --git a/perf/simple-statistics/CONTRIBUTING.md b/perf/simple-statistics/CONTRIBUTING.md new file mode 100644 index 0000000..eebdf1f --- /dev/null +++ b/perf/simple-statistics/CONTRIBUTING.md @@ -0,0 +1,99 @@ +# Contributing to simple-statistics + +Simple statistics is a statistics library that can be both used and read. +It should help programmers learn statistics and statisticians learn programming. +In order to achieve this goal, it must be **simple** and **explanatory**. + +## Simple + +`simple-statistics` is written in a subset of JavaScript. Unused features +include: + +* [Conditional Operator](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Conditional_Operator) +* [ES5 Array methods](http://ie.microsoft.com/TestDrive/HTML5/ECMAScript5Array/Default.html) +* `with`, `eval`, and other forms of `eval` +* Most micro-optimizations, like [alternative for loop forms](http://jsperf.com/loops/70) +* [Shortcut branching](http://javascriptweblog.wordpress.com/2010/07/26/no-more-ifs-alternatives-to-statement-branching-in-javascript/) + +## Explanatory + +Example: + +```js +// # harmonic mean +// +// a mean function typically used to find the average of rates +// +// this is the reciprocal of the arithmetic mean of the reciprocals +// of the input numbers +// +// This runs on `O(n)`, linear time in respect to the array +``` + +`simple-statistics` tries to stay away from speaking only in the language of math: +for instance, while JavaScript supports UTF8 characters like π, they are not used +in the source: + +* UTF8 in JavaScript on pages without specific meta-tag or Content-Type encodings will fail +* UTF8 can be hard to type, since users need to memorize key combinations or code points +* Mathematical symbols have meanings that are often better communicated by words: + in the form of code, we do not run out of space on the paper, and can afford + to call a variable `reciprocal_sum` instead of `r`. + +Every function has a comment that ideally includes: + +* The English, long-form name of the method +* What the method does +* What purpose the method typically serves +* A link to a longer description on Wikipedia, Mathematica, or another + web-accessible, non-paywalled source +* The efficiency of the function in terms of Big-O notation, if appropriate +* If the function depends on another function in the library, a note of this, like + `depends on mean()` + +## Tests + +`simple-statistics` has a testsuite located in `test/spec/`. Each test file +covers a specific topic and tries to test against known values: + +* Values produced by trusted statistics software like R or scipy +* Common-sense results + +Tests can be run in [node.js](http://nodejs.org/) and are run on every commit +to GitHub by Travis-CI. + +To run tests: + +```sh +npm install +npm test +``` + +## Documentation + +While the code is meant to readable, it is not documentation. We maintain +documentation in `API.md`, which has the simple form: + +```md +### .geometric_mean(x) + +[Geometric mean](http://en.wikipedia.org/wiki/Geometric_mean) of a single-dimensional array of **positive** numbers. +``` + +This file is written in [Markdown](https://daringfireball.net/projects/markdown/) and +specifies which functions are available, what type of arguments they receive, +what they compute, and what type of answer they return. + +## Code Style + +We use the [Airbnb style for Javascript](https://github.com/airbnb/javascript) with +only one difference: + +**4 space soft tabs always for Javascript, not 2.** + +No aligned `=`, no aligned arguments, spaces are either indents or the 1 +space between expressions. No hard tabs. + +* All comparisons should be as strict and obvious as possible: prefer `(foo === 0)` to + `(!foo)`. +* Straightforward code is more important than most optimizations. diff --git a/perf/simple-statistics/LICENSE b/perf/simple-statistics/LICENSE new file mode 100644 index 0000000..13a2f78 --- /dev/null +++ b/perf/simple-statistics/LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2014, Tom MacWright + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. diff --git a/perf/simple-statistics/Makefile b/perf/simple-statistics/Makefile new file mode 100644 index 0000000..f497307 --- /dev/null +++ b/perf/simple-statistics/Makefile @@ -0,0 +1,7 @@ +docs: + docco src/*.js + +test: + mocha -R spec test/spec/*.js + +.PHONY: docs test diff --git a/perf/simple-statistics/README.md b/perf/simple-statistics/README.md new file mode 100644 index 0000000..f3587ba --- /dev/null +++ b/perf/simple-statistics/README.md @@ -0,0 +1,337 @@ +[![Build Status](https://secure.travis-ci.org/tmcw/simple-statistics.png?branch=master)](http://travis-ci.org/tmcw/simple-statistics) [![Coverage Status](https://coveralls.io/repos/tmcw/simple-statistics/badge.png)](https://coveralls.io/r/tmcw/simple-statistics) + +A JavaScript implementation of descriptive, regression, and inference statistics. + +Implemented in literate JavaScript with no dependencies, designed to work +in all modern browsers (including IE) as well as in node.js. + +## [API Documentation](API.md) + +--- + +Basic contracts of functions: + +* Functions do not modify their arguments e.g. change their order +* Invalid input, like empty lists to functions that need 1+ items to work, will cause functions to return `null`. + +# Basic Array Operations + +### .mixin(array) + +_Optionally_ mix in the following functions into the `Array` prototype. Otherwise +you can use them off of the simple-statistics object itself. + +If given a particular array instance as an argument, this adds the functions +only to that array rather than the global `Array.prototype`. Without an argument, +it runs on the global `Array.prototype`. + +### .mean(x) + +Mean of a single-dimensional Array of numbers. _Also available as `.average(x)`_ + +### .sum(x) + +Sum of a single-dimensional Array of numbers. + +### .mode(x) + +Returns the number that appears most frequently in a single-dimensional Array +of numbers. If there are multiple modes, the one that appears last +is returned. + +### .variance(x) + +[Variance](http://en.wikipedia.org/wiki/Variance) of a single-dimensional Array of numbers. + +### .standard_deviation(x) + +[Standard Deviation](http://en.wikipedia.org/wiki/Standard_deviation) of a single-dimensional Array of numbers. + +### .median_absolute_deviation(x) + +The Median Absolute Deviation (MAD) is a robust measure of statistical +dispersion. It is more resilient to outliers than the standard deviation. +Accepts a single-dimensional array of numbers and returns a dispersion value. + +Also aliased to `.mad(x)` for brevity. + +### .median(x) + +[Median](http://en.wikipedia.org/wiki/Median) of a single-dimensional array of numbers. + +### .geometric_mean(x) + +[Geometric mean](http://en.wikipedia.org/wiki/Geometric_mean) of a single-dimensional array of **positive** numbers. + +### .harmonic_mean(x) + +[Harmonic mean](http://en.wikipedia.org/wiki/Harmonic_mean) of a single-dimensional array of **positive** numbers. + +### .root_mean_square(x) + +[Root mean square (RMS)](http://en.wikipedia.org/wiki/Root_mean_square) of a single-dimensional array of numbers. + +### .min(x) + +Finds the minimum of a single-dimensional array of numbers. This runs in linear `O(n)` time. + +### .max(x) + +Finds the maximum of a single-dimensional array of numbers. This runs in linear `O(n)` time. + +### .t_test(sample, x) + +Does a [student's t-test](http://en.wikipedia.org/wiki/Student's_t-test) of a dataset `sample`, represented by a single-dimensional array of numbers. `x` is the known value, and the result is a measure of [statistical significance](http://en.wikipedia.org/wiki/Statistical_significance). + +### .t_test_two_sample(sample_x, sample_y, difference) + +The two-sample t-test is used to compare samples from two populations or groups, +confirming or denying the suspicion (null hypothesis) that the populations are +the same. It returns a t-value that you can then look up to give certain +judgements of confidence based on a t distribution table. + +This implementation expects the samples `sample_x` and `sample_y` to be given +as one-dimensional arrays of more than one number each. + +### .sample_variance(x) + +Produces [sample variance](http://mathworld.wolfram.com/SampleVariance.html) +of a single-dimensional array of numbers. + +### .sample_covariance(a, b) + +Produces [sample covariance](http://en.wikipedia.org/wiki/Sample_mean_and_sample_covariance) +of two single-dimensional arrays of numbers. + +### .sample_correlation(a, b) + +Produces [sample correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) +of two single-dimensional arrays of numbers. + +### .quantile(sample, p) + +Does a [quantile](http://en.wikipedia.org/wiki/Quantile) of a dataset `sample`, +at p. For those familiary with the `k/q` syntax, `p == k/q`. `sample` must +be a single-dimensional array of numbers. p must be a number greater than or equal to zero and less than or equal to one, or an array of numbers following that rule. +If an array is given, an array of results will be returned instead of a single +number. + +### .chunk(sample, chunkSize) + +Given a `sample` array, and a positive integer `chunkSize`, splits an array +into chunks of `chunkSize` size and returns an array of those chunks. This +does not change the input value. If the length of `sample` is not divisible +by `chunkSize`, the last array will be shorter than the rest. + +### .quantile_sorted(sample, p) + +Does a [quantile](http://en.wikipedia.org/wiki/Quantile) of a dataset `sample`, +at p. `sample` must be a one-dimensional _sorted_ array of numbers, and +`p` must be a single number greater than or equal to zero and less than or equal to one. + +### .iqr(sample) + +Calculates the [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) of +a sample - the difference between the upper and lower quartiles. Useful +as a measure of dispersion. + +_Also available as `.interquartile_range(x)`_ + +### .sample_skewness(sample) + +Calculates the [skewness](http://en.wikipedia.org/wiki/Skewness) of +a sample, a measure of the extent to which a probability distribution of a +real-valued random variable "leans" to one side of the mean. +The skewness value can be positive or negative, or even undefined. + +This implementation uses the [Fisher-Pearson standardized moment coefficient](http://en.wikipedia.org/wiki/Skewness#Pearson.27s_skewness_coefficients), +which means that it behaves the same as Excel, Minitab, SAS, and SPSS. + +Skewness is only valid for samples of over three values. + +### .jenks(data, number_of_classes) + +Find the [Jenks Natural Breaks](http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization) for +a single-dimensional array of numbers as input and a desired `number_of_classes`. +The result is a single-dimensional with class breaks, including the minimum +and maximum of the input array. + +### .r_squared(data, function) + +Find the [r-squared](http://en.wikipedia.org/wiki/Coefficient_of_determination) value of a particular dataset, expressed as a two-dimensional `Array` of numbers, against a `Function`. + + var r_squared = ss.r_squared([[1, 1]], function(x) { return x * 2; }); + +### .cumulative_std_normal_probability(z) + +Look up the given `z` value in a [standard normal table](http://en.wikipedia.org/wiki/Standard_normal_table) +to calculate the probability of a random variable appearing with a given value. + +### .z_score(x, mean, standard_deviation) + +The standard score is the number of standard deviations an observation +or datum is above or below the mean. + +### .standard_normal_table + +A [standard normal table](http://en.wikipedia.org/wiki/Standard_normal_table) from +which to pull values of Φ (phi). + +## Regression + +### .linear_regression() + +Create a new linear regression solver. + +#### .data([[1, 1], [2, 2]]) + +Set the data of a linear regression. The input is a two-dimensional array of numbers, which are treated as coordinates, like `[[x, y], [x1, y1]]`. + +#### .line() + +Get the linear regression line: this returns a function that you can +give `x` values and it will return `y` values. Internally, this uses the `m()` +and `b()` values and the classic `y = mx + b` equation. + + var linear_regression_line = ss.linear_regression() + .data([[0, 1], [2, 2], [3, 3]]).line(); + linear_regression_line(5); + +#### .m() + +Just get the slope of the fitted regression line, the `m` component of the full +line equation. Returns a number. + +#### .b() + +Just get the y-intercept of the fitted regression line, the `b` component +of the line equation. Returns a number. + +## Classification + +### .bayesian() + +Create a naïve bayesian classifier. + +### .train(item, category) + +Train the classifier to classify a certain item, given as an object with keys, +to be in a certain category, given as a string. + +### .score(item) + +Get the classifications of a certain item, given as an object of +`category -> score` mappings. + + var bayes = ss.bayesian(); + bayes.train({ species: 'Cat' }, 'animal'); + bayes.score({ species: 'Cat' }); + // { animal: 1 } + + + +--- + +## [Literate Source](http://macwright.org/simple-statistics/) + +## Usage + +To use it in browsers, grab [simple_statistics.js](https://raw.github.com/tmcw/simple-statistics/master/src/simple_statistics.js). +To use it in node, install it with [npm](https://npmjs.org/) or add it to your package.json. + + npm install simple-statistics + +To use it with [component](https://github.com/component/component), + + component install tmcw/simple-statistics + +To use it with [bower](http://bower.io/), + + bower install simple-statistics + +## Basic Descriptive Statistics + +```javascript +// Require simple statistics +var ss = require('simple-statistics'); + +// The input is a simple array +var list = [1, 2, 3]; + +// Many different descriptive statistics are supported +var sum = ss.sum(list), + mean = ss.mean(list), + min = ss.min(list), + geometric_mean = ss.geometric_mean(list), + max = ss.max(list), + quantile = ss.quantile(0.25); +``` + +## Linear Regression + +```javascript +// For a linear regression, it's a two-dimensional array +var data = [ [1, 2], [2, 3] ]; + +// simple-statistics can produce a linear regression and return +// a friendly javascript function for the line. +var line = ss.linear_regression() + .data(data) + .line(); + +// get a point along the line function +line(0); + +var line = ss.linear_regression() + +// Get the r-squared value of the line estimation +ss.r_squared(data, line); +``` + +### Bayesian Classifier + +```javascript +var bayes = ss.bayesian(); +bayes.train({ species: 'Cat' }, 'animal'); +bayes.score({ species: 'Cat' }); +// { animal: 1 } +``` + +### Mixin Style + +_This is **optional** and not used by default. You can opt-in to mixins +with `ss.mixin()`._ + +This mixes `simple-statistics` methods into the Array prototype - note that +[extending native objects](http://perfectionkills.com/extending-native-builtins/) is a +tricky move. + +This will _only work_ if `defineProperty` is available, which means modern browsers +and nodejs - on IE8 and below, calling `ss.mixin()` will throw an exception. + +```javascript +// mixin to Array class +ss.mixin(); + +// The input is a simple array +var list = [1, 2, 3]; + +// The same descriptive techniques as above, but in a simpler style +var sum = list.sum(), + mean = list.mean(), + min = list.min(), + max = list.max(), + quantile = list.quantile(0.25); +``` + +## Examples + +* [Linear regression with simple-statistics and d3js](http://bl.ocks.org/3931800) +* [Jenks Natural Breaks with a choropleth map with d3js](http://bl.ocks.org/tmcw/4969184) + +# Contributors + +* Tom MacWright +* [Matt Sacks](https://github.com/mattsacks) +* Doron Linder +* [Alexander Sicular](https://github.com/siculars) diff --git a/perf/simple-statistics/README.test.md b/perf/simple-statistics/README.test.md new file mode 100644 index 0000000..5734feb --- /dev/null +++ b/perf/simple-statistics/README.test.md @@ -0,0 +1,157 @@ +[![Build Status](https://secure.travis-ci.org/tmcw/simple-statistics.png?branch=master)](http://travis-ci.org/tmcw/simple-statistics) + +A JavaScript implementation of descriptive, regression, and inference statistics. + +Implemented in literate JavaScript with no dependencies, designed to work +in all modern browsers (including IE) as well as in node.js. + +# [API](API.md) + +[Full documentation](API.md) + +--- +``` + + Basic Array Operations + .mixin() + .mean(x) + .sum(x) + .variance(x) + .standard_deviation(x) + .median_absolute_deviation(x) + .median(x) + .geometric_mean(x) + .harmonic_mean(x) + .root_mean_square(x) + .min(x) + .max(x) + .t_test(sample, x) + .t_test_two_sample(sample_x, sample_y, difference) + .sample_variance(x) + .sample_covariance(x) + .sample_correlation(x) + .quantile(sample, p) + .iqr(sample) + .sample_skewness(sample) + .jenks(data, number_of_classes) + .r_squared(data, function) + .cumulative_std_normal_probability(z) + .z_score(x, mean, standard_deviation) + .standard_normal_table + Regression + .linear_regression() + .data([[1, 1], [2, 2]]) + .line() + .m() + .b() + Classification + .bayesian() + .train(item, category) + .score(item) +``` + +--- + +# [Literate Source](http://macwright.org/simple-statistics/) + +## Usage + +To use it in browsers, grab [simple_statistics.js](https://raw.github.com/tmcw/simple-statistics/master/src/simple_statistics.js). +To use it in node, install it with [npm](https://npmjs.org/) or add it to your package.json. + + npm install simple-statistics + +To use it with [component](https://github.com/component/component), + + component install tmcw/simple-statistics + +To use it with [bower](http://bower.io/), + + bower install simple-statistics + +## Basic Descriptive Statistics + +```javascript +// Require simple statistics +var ss = require('simple-statistics'); + +// The input is a simple array +var list = [1, 2, 3]; + +// Many different descriptive statistics are supported +var sum = ss.sum(list), + mean = ss.mean(list), + min = ss.min(list), + geometric_mean = ss.geometric_mean(list), + max = ss.max(list), + quantile = ss.quantile(0.25); +``` + +## Linear Regression + +```javascript +// For a linear regression, it's a two-dimensional array +var data = [ [1, 2], [2, 3] ]; + +// simple-statistics can produce a linear regression and return +// a friendly javascript function for the line. +var line = ss.linear_regression() + .data(data) + .line(); + +// get a point along the line function +line(0); + +var line = ss.linear_regression() + +// Get the r-squared value of the line estimation +ss.r_squared(data, line); +``` + +### Bayesian Classifier + +```javascript +var bayes = ss.bayesian(); +bayes.train({ species: 'Cat' }, 'animal'); +bayes.score({ species: 'Cat' }); +// { animal: 1 } +``` + +### Mixin Style + +_This is **optional** and not used by default. You can opt-in to mixins +with `ss.mixin()`._ + +This mixes `simple-statistics` methods into the Array prototype - note that +[extending native objects](http://perfectionkills.com/extending-built-in-native-objects-evil-or-not/) is a +tricky move. + +This will _only work_ if `defineProperty` is available, which means modern browsers +and nodejs - on IE8 and below, calling `ss.mixin()` will throw an exception. + +```javascript +// mixin to Array class +ss.mixin(); + +// The input is a simple array +var list = [1, 2, 3]; + +// The same descriptive techniques as above, but in a simpler style +var sum = list.sum(), + mean = list.mean(), + min = list.min(), + max = list.max(), + quantile = list.quantile(0.25); +``` + +## Examples + +* [Linear regression with simple-statistics and d3js](http://bl.ocks.org/3931800) +* [Jenks Natural Breaks with a choropleth map with d3js](http://bl.ocks.org/tmcw/4969184) + +# Contributors + +* Tom MacWright +* [Matt Sacks](https://github.com/mattsacks) +* Doron Linder +* [Alexander Sicular](https://github.com/siculars) diff --git a/perf/simple-statistics/SEEALSO.md b/perf/simple-statistics/SEEALSO.md new file mode 100644 index 0000000..c4699ba --- /dev/null +++ b/perf/simple-statistics/SEEALSO.md @@ -0,0 +1,23 @@ +## See Also + +* [stream-statistics](https://github.com/tmcw/stream-statistics), a sister project that implements + many of the same measures for streaming data - as online algorithms + +### Javascript + +* [science.js](https://github.com/jasondavies/science.js) +* [atoll.js](https://github.com/nsfmc/atoll.js) +* [descriptive_statistics](https://github.com/thirtysixthspan/descriptive_statistics) +* [jStat](http://www.jstat.org/) +* [classifier](https://github.com/harthur/classifier) is a naive bayesian classifier (though specialized for the words-spam case) +* [underscore.math](https://github.com/syntagmatic/underscore.math/blob/master/underscore.math.js) + +### Python + +* [Pandas](http://pandas.pydata.org/) +* [SciPy](http://www.scipy.org/) + +### Their Own Language + +* [Julia Language](http://julialang.org/) +* [R language](http://www.r-project.org/) diff --git a/perf/simple-statistics/api.js b/perf/simple-statistics/api.js new file mode 100644 index 0000000..531e3e1 --- /dev/null +++ b/perf/simple-statistics/api.js @@ -0,0 +1,20 @@ +var fs = require('fs'); + +var readme = fs.readFileSync('README.md', 'utf8') + .split('\n'); + +var a = true, b = true; + +fs.writeFileSync('README.md', readme.filter(function(f) { + if (f === '---') { + a = !a; + return true; + } + return a; +}).map(function(f) { + if (f === '---' && b) { + f = f + '\n\n' + fs.readFileSync('API.md', 'utf8') + '\n\n'; + b = false; + } + return f; +}).join('\n')); diff --git a/perf/simple-statistics/bower.json b/perf/simple-statistics/bower.json new file mode 100644 index 0000000..b4349a1 --- /dev/null +++ b/perf/simple-statistics/bower.json @@ -0,0 +1,11 @@ +{ + "name": "simple-statistics", + "version": "0.9.0", + "description": "Simple Statistics", + "repo": "tmcw/simple-statistics", + "keywords": [], + "license": "ISC", + "dependencies": {}, + "development": {}, + "main": "src/simple_statistics.js" +} \ No newline at end of file diff --git a/perf/simple-statistics/component.json b/perf/simple-statistics/component.json new file mode 100644 index 0000000..7973639 --- /dev/null +++ b/perf/simple-statistics/component.json @@ -0,0 +1,13 @@ +{ + "name": "simple-statistics", + "version": "0.9.0", + "description": "Simple Statistics", + "repo": "tmcw/simple-statistics", + "keywords": [], + "license": "ISC", + "dependencies": {}, + "development": {}, + "scripts": [ + "src/simple_statistics.js" + ] +} \ No newline at end of file diff --git a/perf/simple-statistics/docs/docco.css b/perf/simple-statistics/docs/docco.css new file mode 100644 index 0000000..a2899ac --- /dev/null +++ b/perf/simple-statistics/docs/docco.css @@ -0,0 +1,506 @@ +/*--------------------- Typography ----------------------------*/ + +@font-face { + font-family: 'aller-light'; + src: url('public/fonts/aller-light.eot'); + src: url('public/fonts/aller-light.eot?#iefix') format('embedded-opentype'), + url('public/fonts/aller-light.woff') format('woff'), + url('public/fonts/aller-light.ttf') format('truetype'); + font-weight: normal; + font-style: normal; +} + +@font-face { + font-family: 'aller-bold'; + src: url('public/fonts/aller-bold.eot'); + src: url('public/fonts/aller-bold.eot?#iefix') format('embedded-opentype'), + url('public/fonts/aller-bold.woff') format('woff'), + url('public/fonts/aller-bold.ttf') format('truetype'); + font-weight: normal; + font-style: normal; +} + +@font-face { + font-family: 'novecento-bold'; + src: url('public/fonts/novecento-bold.eot'); + src: url('public/fonts/novecento-bold.eot?#iefix') format('embedded-opentype'), + url('public/fonts/novecento-bold.woff') format('woff'), + url('public/fonts/novecento-bold.ttf') format('truetype'); + font-weight: normal; + font-style: normal; +} + +/*--------------------- Layout ----------------------------*/ +html { height: 100%; } +body { + font-family: "aller-light"; + font-size: 14px; + line-height: 18px; + color: #30404f; + margin: 0; padding: 0; + height:100%; +} +#container { min-height: 100%; } + +a { + color: #000; +} + +b, strong { + font-weight: normal; + font-family: "aller-bold"; +} + +p { + margin: 15px 0 0px; +} + .annotation ul, .annotation ol { + margin: 25px 0; + } + .annotation ul li, .annotation ol li { + font-size: 14px; + line-height: 18px; + margin: 10px 0; + } + +h1, h2, h3, h4, h5, h6 { + color: #112233; + line-height: 1em; + font-weight: normal; + font-family: "novecento-bold"; + text-transform: uppercase; + margin: 30px 0 15px 0; +} + +h1 { + margin-top: 40px; +} + +hr { + border: 0; + background: 1px #ddd; + height: 1px; + margin: 20px 0; +} + +pre, tt, code { + font-size: 12px; line-height: 16px; + font-family: Menlo, Monaco, Consolas, "Lucida Console", monospace; + margin: 0; padding: 0; +} + .annotation pre { + display: block; + margin: 0; + padding: 7px 10px; + background: #fcfcfc; + -moz-box-shadow: inset 0 0 10px rgba(0,0,0,0.1); + -webkit-box-shadow: inset 0 0 10px rgba(0,0,0,0.1); + box-shadow: inset 0 0 10px rgba(0,0,0,0.1); + overflow-x: auto; + } + .annotation pre code { + border: 0; + padding: 0; + background: transparent; + } + + +blockquote { + border-left: 5px solid #ccc; + margin: 0; + padding: 1px 0 1px 1em; +} + .sections blockquote p { + font-family: Menlo, Consolas, Monaco, monospace; + font-size: 12px; line-height: 16px; + color: #999; + margin: 10px 0 0; + white-space: pre-wrap; + } + +ul.sections { + list-style: none; + padding:0 0 5px 0;; + margin:0; +} + +/* + Force border-box so that % widths fit the parent + container without overlap because of margin/padding. + + More Info : http://www.quirksmode.org/css/box.html +*/ +ul.sections > li > div { + -moz-box-sizing: border-box; /* firefox */ + -ms-box-sizing: border-box; /* ie */ + -webkit-box-sizing: border-box; /* webkit */ + -khtml-box-sizing: border-box; /* konqueror */ + box-sizing: border-box; /* css3 */ +} + + +/*---------------------- Jump Page -----------------------------*/ +#jump_to, #jump_page { + margin: 0; + background: white; + -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777; + -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px; + font: 16px Arial; + cursor: pointer; + text-align: right; + list-style: none; +} + +#jump_to a { + text-decoration: none; +} + +#jump_to a.large { + display: none; +} +#jump_to a.small { + font-size: 22px; + font-weight: bold; + color: #676767; +} + +#jump_to, #jump_wrapper { + position: fixed; + right: 0; top: 0; + padding: 10px 15px; + margin:0; +} + +#jump_wrapper { + display: none; + padding:0; +} + +#jump_to:hover #jump_wrapper { + display: block; +} + +#jump_page { + padding: 5px 0 3px; + margin: 0 0 25px 25px; +} + +#jump_page .source { + display: block; + padding: 15px; + text-decoration: none; + border-top: 1px solid #eee; +} + +#jump_page .source:hover { + background: #f5f5ff; +} + +#jump_page .source:first-child { +} + +/*---------------------- Low resolutions (> 320px) ---------------------*/ +@media only screen and (min-width: 320px) { + .pilwrap { display: none; } + + ul.sections > li > div { + display: block; + padding:5px 10px 0 10px; + } + + ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol { + padding-left: 30px; + } + + ul.sections > li > div.content { + overflow-x:auto; + -webkit-box-shadow: inset 0 0 5px #e5e5ee; + box-shadow: inset 0 0 5px #e5e5ee; + border: 1px solid #dedede; + margin:5px 10px 5px 10px; + padding-bottom: 5px; + } + + ul.sections > li > div.annotation pre { + margin: 7px 0 7px; + padding-left: 15px; + } + + ul.sections > li > div.annotation p tt, .annotation code { + background: #f8f8ff; + border: 1px solid #dedede; + font-size: 12px; + padding: 0 0.2em; + } +} + +/*---------------------- (> 481px) ---------------------*/ +@media only screen and (min-width: 481px) { + #container { + position: relative; + } + body { + background-color: #F5F5FF; + font-size: 15px; + line-height: 21px; + } + pre, tt, code { + line-height: 18px; + } + p, ul, ol { + margin: 0 0 15px; + } + + + #jump_to { + padding: 5px 10px; + } + #jump_wrapper { + padding: 0; + } + #jump_to, #jump_page { + font: 10px Arial; + text-transform: uppercase; + } + #jump_page .source { + padding: 5px 10px; + } + #jump_to a.large { + display: inline-block; + } + #jump_to a.small { + display: none; + } + + + + #background { + position: absolute; + top: 0; bottom: 0; + width: 350px; + background: #fff; + border-right: 1px solid #e5e5ee; + z-index: -1; + } + + ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol { + padding-left: 40px; + } + + ul.sections > li { + white-space: nowrap; + } + + ul.sections > li > div { + display: inline-block; + } + + ul.sections > li > div.annotation { + max-width: 350px; + min-width: 350px; + min-height: 5px; + padding: 13px; + overflow-x: hidden; + white-space: normal; + vertical-align: top; + text-align: left; + } + ul.sections > li > div.annotation pre { + margin: 15px 0 15px; + padding-left: 15px; + } + + ul.sections > li > div.content { + padding: 13px; + vertical-align: top; + border: none; + -webkit-box-shadow: none; + box-shadow: none; + } + + .pilwrap { + position: relative; + display: inline; + } + + .pilcrow { + font: 12px Arial; + text-decoration: none; + color: #454545; + position: absolute; + top: 3px; left: -20px; + padding: 1px 2px; + opacity: 0; + -webkit-transition: opacity 0.2s linear; + } + .for-h1 .pilcrow { + top: 47px; + } + .for-h2 .pilcrow, .for-h3 .pilcrow, .for-h4 .pilcrow { + top: 35px; + } + + ul.sections > li > div.annotation:hover .pilcrow { + opacity: 1; + } +} + +/*---------------------- (> 1025px) ---------------------*/ +@media only screen and (min-width: 1025px) { + + body { + font-size: 16px; + line-height: 24px; + } + + #background { + width: 525px; + } + ul.sections > li > div.annotation { + max-width: 525px; + min-width: 525px; + padding: 10px 25px 1px 50px; + } + ul.sections > li > div.content { + padding: 9px 15px 16px 25px; + } +} + +/*---------------------- Syntax Highlighting -----------------------------*/ + +td.linenos { background-color: #f0f0f0; padding-right: 10px; } +span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } +/* + +github.com style (c) Vasily Polovnyov + +*/ + +pre code { + display: block; padding: 0.5em; + color: #000; + background: #f8f8ff +} + +pre .hljs-comment, +pre .hljs-template_comment, +pre .hljs-diff .hljs-header, +pre .hljs-javadoc { + color: #408080; + font-style: italic +} + +pre .hljs-keyword, +pre .hljs-assignment, +pre .hljs-literal, +pre .hljs-css .hljs-rule .hljs-keyword, +pre .hljs-winutils, +pre .hljs-javascript .hljs-title, +pre .hljs-lisp .hljs-title, +pre .hljs-subst { + color: #954121; + /*font-weight: bold*/ +} + +pre .hljs-number, +pre .hljs-hexcolor { + color: #40a070 +} + +pre .hljs-string, +pre .hljs-tag .hljs-value, +pre .hljs-phpdoc, +pre .hljs-tex .hljs-formula { + color: #219161; +} + +pre .hljs-title, +pre .hljs-id { + color: #19469D; +} +pre .hljs-params { + color: #00F; +} + +pre .hljs-javascript .hljs-title, +pre .hljs-lisp .hljs-title, +pre .hljs-subst { + font-weight: normal +} + +pre .hljs-class .hljs-title, +pre .hljs-haskell .hljs-label, +pre .hljs-tex .hljs-command { + color: #458; + font-weight: bold +} + +pre .hljs-tag, +pre .hljs-tag .hljs-title, +pre .hljs-rules .hljs-property, +pre .hljs-django .hljs-tag .hljs-keyword { + color: #000080; + font-weight: normal +} + +pre .hljs-attribute, +pre .hljs-variable, +pre .hljs-instancevar, +pre .hljs-lisp .hljs-body { + color: #008080 +} + +pre .hljs-regexp { + color: #B68 +} + +pre .hljs-class { + color: #458; + font-weight: bold +} + +pre .hljs-symbol, +pre .hljs-ruby .hljs-symbol .hljs-string, +pre .hljs-ruby .hljs-symbol .hljs-keyword, +pre .hljs-ruby .hljs-symbol .hljs-keymethods, +pre .hljs-lisp .hljs-keyword, +pre .hljs-tex .hljs-special, +pre .hljs-input_number { + color: #990073 +} + +pre .hljs-builtin, +pre .hljs-constructor, +pre .hljs-built_in, +pre .hljs-lisp .hljs-title { + color: #0086b3 +} + +pre .hljs-preprocessor, +pre .hljs-pi, +pre .hljs-doctype, +pre .hljs-shebang, +pre .hljs-cdata { + color: #999; + font-weight: bold +} + +pre .hljs-deletion { + background: #fdd +} + +pre .hljs-addition { + background: #dfd +} + +pre .hljs-diff .hljs-change { + background: #0086b3 +} + +pre .hljs-chunk { + color: #aaa +} + +pre .hljs-tex .hljs-formula { + opacity: 0.5; +} diff --git a/perf/simple-statistics/docs/simple_statistics.html b/perf/simple-statistics/docs/simple_statistics.html new file mode 100644 index 0000000..31523c1 --- /dev/null +++ b/perf/simple-statistics/docs/simple_statistics.html @@ -0,0 +1,4198 @@ + + + + + simple_statistics.js + + + + + +
+
+ + +
+ + diff --git a/perf/simple-statistics/index.html b/perf/simple-statistics/index.html new file mode 100644 index 0000000..6ecad59 --- /dev/null +++ b/perf/simple-statistics/index.html @@ -0,0 +1 @@ + diff --git a/perf/simple-statistics/package.json b/perf/simple-statistics/package.json new file mode 100644 index 0000000..91436a7 --- /dev/null +++ b/perf/simple-statistics/package.json @@ -0,0 +1,28 @@ +{ + "name": "simple-statistics", + "version": "0.9.0", + "description": "Simple Statistics", + "author": "Tom MacWright (http://macwright.org/)", + "repository": { + "type": "git", + "url": "git://github.com/tmcw/simple-statistics.git" + }, + "dependencies": {}, + "devDependencies": { + "jshint": "2.5.3", + "coveralls": "~2.11.1", + "istanbul": "~0.3.0", + "tape": "~2.14.0", + "random-js": "~1.0.4" + }, + "scripts": { + "test": "tape test/*.js", + "cov": "istanbul cover ./node_modules/.bin/tape test/*.js && coveralls < ./coverage/lcov.info", + "api": "node api.js" + }, + "main": "src/simple_statistics.js", + "engines": { + "node": "*" + }, + "license": "ISC" +} diff --git a/perf/simple-statistics/src/simple_statistics.js b/perf/simple-statistics/src/simple_statistics.js new file mode 100644 index 0000000..28e1827 --- /dev/null +++ b/perf/simple-statistics/src/simple_statistics.js @@ -0,0 +1,1541 @@ +/* global module */ +// # simple-statistics +// +// A simple, literate statistics system. The code below uses the +// [Javascript module pattern](http://www.adequatelygood.com/2010/3/JavaScript-Module-Pattern-In-Depth), +// eventually assigning `simple-statistics` to `ss` in browsers or the +// `exports` object for node.js +(function() { + var ss = {}; + + if (typeof module !== 'undefined') { + // Assign the `ss` object to exports, so that you can require + // it in [node.js](http://nodejs.org/) + module.exports = ss; + } else { + // Otherwise, in a browser, we assign `ss` to the window object, + // so you can simply refer to it as `ss`. + this.ss = ss; + } + + // # [Linear Regression](http://en.wikipedia.org/wiki/Linear_regression) + // + // [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression) + // is a simple way to find a fitted line + // between a set of coordinates. + function linear_regression() { + var linreg = {}, + data = []; + + // Assign data to the model. Data is assumed to be an array. + linreg.data = function(x) { + if (!arguments.length) return data; + data = x.slice(); + return linreg; + }; + + // Calculate the slope and y-intercept of the regression line + // by calculating the least sum of squares + linreg.mb = function() { + var m, b; + + // Store data length in a local variable to reduce + // repeated object property lookups + var data_length = data.length; + + //if there's only one point, arbitrarily choose a slope of 0 + //and a y-intercept of whatever the y of the initial point is + if (data_length === 1) { + m = 0; + b = data[0][1]; + } else { + // Initialize our sums and scope the `m` and `b` + // variables that define the line. + var sum_x = 0, sum_y = 0, + sum_xx = 0, sum_xy = 0; + + // Use local variables to grab point values + // with minimal object property lookups + var point, x, y; + + // Gather the sum of all x values, the sum of all + // y values, and the sum of x^2 and (x*y) for each + // value. + // + // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy + for (var i = 0; i < data_length; i++) { + point = data[i]; + x = point[0]; + y = point[1]; + + sum_x += x; + sum_y += y; + + sum_xx += x * x; + sum_xy += x * y; + } + + // `m` is the slope of the regression line + m = ((data_length * sum_xy) - (sum_x * sum_y)) / + ((data_length * sum_xx) - (sum_x * sum_x)); + + // `b` is the y-intercept of the line. + b = (sum_y / data_length) - ((m * sum_x) / data_length); + } + + // Return both values as an object. + return { m: m, b: b }; + }; + + // a shortcut for simply getting the slope of the regression line + linreg.m = function() { + return linreg.mb().m; + }; + + // a shortcut for simply getting the y-intercept of the regression + // line. + linreg.b = function() { + return linreg.mb().b; + }; + + // ## Fitting The Regression Line + // + // This is called after `.data()` and returns the + // equation `y = f(x)` which gives the position + // of the regression line at each point in `x`. + linreg.line = function() { + + // Get the slope, `m`, and y-intercept, `b`, of the line. + var mb = linreg.mb(), + m = mb.m, + b = mb.b; + + // Return a function that computes a `y` value for each + // x value it is given, based on the values of `b` and `a` + // that we just computed. + return function(x) { + return b + (m * x); + }; + }; + + return linreg; + } + + // # [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination) + // + // The r-squared value of data compared with a function `f` + // is the sum of the squared differences between the prediction + // and the actual value. + function r_squared(data, f) { + if (data.length < 2) return 1; + + // Compute the average y value for the actual + // data set in order to compute the + // _total sum of squares_ + var sum = 0, average; + for (var i = 0; i < data.length; i++) { + sum += data[i][1]; + } + average = sum / data.length; + + // Compute the total sum of squares - the + // squared difference between each point + // and the average of all points. + var sum_of_squares = 0; + for (var j = 0; j < data.length; j++) { + sum_of_squares += Math.pow(average - data[j][1], 2); + } + + // Finally estimate the error: the squared + // difference between the estimate and the actual data + // value at each point. + var err = 0; + for (var k = 0; k < data.length; k++) { + err += Math.pow(data[k][1] - f(data[k][0]), 2); + } + + // As the error grows larger, its ratio to the + // sum of squares increases and the r squared + // value grows lower. + return 1 - (err / sum_of_squares); + } + + + // # [Bayesian Classifier](http://en.wikipedia.org/wiki/Naive_Bayes_classifier) + // + // This is a naïve bayesian classifier that takes + // singly-nested objects. + function bayesian() { + // The `bayes_model` object is what will be exposed + // by this closure, with all of its extended methods, and will + // have access to all scope variables, like `total_count`. + var bayes_model = {}, + // The number of items that are currently + // classified in the model + total_count = 0, + // Every item classified in the model + data = {}; + + // ## Train + // Train the classifier with a new item, which has a single + // dimension of Javascript literal keys and values. + bayes_model.train = function(item, category) { + // If the data object doesn't have any values + // for this category, create a new object for it. + if (!data[category]) data[category] = {}; + + // Iterate through each key in the item. + for (var k in item) { + var v = item[k]; + // Initialize the nested object `data[category][k][item[k]]` + // with an object of keys that equal 0. + if (data[category][k] === undefined) data[category][k] = {}; + if (data[category][k][v] === undefined) data[category][k][v] = 0; + + // And increment the key for this key/value combination. + data[category][k][item[k]]++; + } + // Increment the number of items classified + total_count++; + }; + + // ## Score + // Generate a score of how well this item matches all + // possible categories based on its attributes + bayes_model.score = function(item) { + // Initialize an empty array of odds per category. + var odds = {}, category; + // Iterate through each key in the item, + // then iterate through each category that has been used + // in previous calls to `.train()` + for (var k in item) { + var v = item[k]; + for (category in data) { + // Create an empty object for storing key - value combinations + // for this category. + if (odds[category] === undefined) odds[category] = {}; + + // If this item doesn't even have a property, it counts for nothing, + // but if it does have the property that we're looking for from + // the item to categorize, it counts based on how popular it is + // versus the whole population. + if (data[category][k]) { + odds[category][k + '_' + v] = (data[category][k][v] || 0) / total_count; + } else { + odds[category][k + '_' + v] = 0; + } + } + } + + // Set up a new object that will contain sums of these odds by category + var odds_sums = {}; + + for (category in odds) { + // Tally all of the odds for each category-combination pair - + // the non-existence of a category does not add anything to the + // score. + for (var combination in odds[category]) { + if (odds_sums[category] === undefined) odds_sums[category] = 0; + odds_sums[category] += odds[category][combination]; + } + } + + return odds_sums; + }; + + // Return the completed model. + return bayes_model; + } + + // # sum + // + // is simply the result of adding all numbers + // together, starting from zero. + // + // This runs on `O(n)`, linear time in respect to the array + function sum(x) { + var value = 0; + for (var i = 0; i < x.length; i++) { + value += x[i]; + } + return value; + } + + // # mean + // + // is the sum over the number of values + // + // This runs on `O(n)`, linear time in respect to the array + function mean(x) { + // The mean of no numbers is null + if (x.length === 0) return null; + + return sum(x) / x.length; + } + + // # geometric mean + // + // a mean function that is more useful for numbers in different + // ranges. + // + // this is the nth root of the input numbers multiplied by each other + // + // This runs on `O(n)`, linear time in respect to the array + function geometric_mean(x) { + // The mean of no numbers is null + if (x.length === 0) return null; + + // the starting value. + var value = 1; + + for (var i = 0; i < x.length; i++) { + // the geometric mean is only valid for positive numbers + if (x[i] <= 0) return null; + + // repeatedly multiply the value by each number + value *= x[i]; + } + + return Math.pow(value, 1 / x.length); + } + + + // # harmonic mean + // + // a mean function typically used to find the average of rates + // + // this is the reciprocal of the arithmetic mean of the reciprocals + // of the input numbers + // + // This runs on `O(n)`, linear time in respect to the array + function harmonic_mean(x) { + // The mean of no numbers is null + if (x.length === 0) return null; + + var reciprocal_sum = 0; + + for (var i = 0; i < x.length; i++) { + // the harmonic mean is only valid for positive numbers + if (x[i] <= 0) return null; + + reciprocal_sum += 1 / x[i]; + } + + // divide n by the the reciprocal sum + return x.length / reciprocal_sum; + } + + // root mean square (RMS) + // + // a mean function used as a measure of the magnitude of a set + // of numbers, regardless of their sign + // + // this is the square root of the mean of the squares of the + // input numbers + // + // This runs on `O(n)`, linear time in respect to the array + function root_mean_square(x) { + if (x.length === 0) return null; + + var sum_of_squares = 0; + for (var i = 0; i < x.length; i++) { + sum_of_squares += Math.pow(x[i], 2); + } + + return Math.sqrt(sum_of_squares / x.length); + } + + // # min + // + // This is simply the minimum number in the set. + // + // This runs on `O(n)`, linear time in respect to the array + function min(x) { + var value; + for (var i = 0; i < x.length; i++) { + // On the first iteration of this loop, min is + // undefined and is thus made the minimum element in the array + if (x[i] < value || value === undefined) value = x[i]; + } + return value; + } + + // # max + // + // This is simply the maximum number in the set. + // + // This runs on `O(n)`, linear time in respect to the array + function max(x) { + var value; + for (var i = 0; i < x.length; i++) { + // On the first iteration of this loop, max is + // undefined and is thus made the maximum element in the array + if (x[i] > value || value === undefined) value = x[i]; + } + return value; + } + + // # [variance](http://en.wikipedia.org/wiki/Variance) + // + // is the sum of squared deviations from the mean + // + // depends on `mean()` + function variance(x) { + // The variance of no numbers is null + if (x.length === 0) return null; + + var mean_value = mean(x), + deviations = []; + + // Make a list of squared deviations from the mean. + for (var i = 0; i < x.length; i++) { + deviations.push(Math.pow(x[i] - mean_value, 2)); + } + + // Find the mean value of that list + return mean(deviations); + } + + // # [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) + // + // is just the square root of the variance. + // + // depends on `variance()` + function standard_deviation(x) { + // The standard deviation of no numbers is null + if (x.length === 0) return null; + + return Math.sqrt(variance(x)); + } + + // The sum of deviations to the Nth power. + // When n=2 it's the sum of squared deviations. + // When n=3 it's the sum of cubed deviations. + // + // depends on `mean()` + function sum_nth_power_deviations(x, n) { + var mean_value = mean(x), + sum = 0; + + for (var i = 0; i < x.length; i++) { + sum += Math.pow(x[i] - mean_value, n); + } + + return sum; + } + + // # [variance](http://en.wikipedia.org/wiki/Variance) + // + // is the sum of squared deviations from the mean + // + // depends on `sum_nth_power_deviations` + function sample_variance(x) { + // The variance of no numbers is null + if (x.length <= 1) return null; + + var sum_squared_deviations_value = sum_nth_power_deviations(x, 2); + + // Find the mean value of that list + return sum_squared_deviations_value / (x.length - 1); + } + + // # [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation) + // + // is just the square root of the variance. + // + // depends on `sample_variance()` + function sample_standard_deviation(x) { + // The standard deviation of no numbers is null + if (x.length <= 1) return null; + + return Math.sqrt(sample_variance(x)); + } + + // # [covariance](http://en.wikipedia.org/wiki/Covariance) + // + // sample covariance of two datasets: + // how much do the two datasets move together? + // x and y are two datasets, represented as arrays of numbers. + // + // depends on `mean()` + function sample_covariance(x, y) { + + // The two datasets must have the same length which must be more than 1 + if (x.length <= 1 || x.length != y.length){ + return null; + } + + // determine the mean of each dataset so that we can judge each + // value of the dataset fairly as the difference from the mean. this + // way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance + // does not suffer because of the difference in absolute values + var xmean = mean(x), + ymean = mean(y), + sum = 0; + + // for each pair of values, the covariance increases when their + // difference from the mean is associated - if both are well above + // or if both are well below + // the mean, the covariance increases significantly. + for (var i = 0; i < x.length; i++){ + sum += (x[i] - xmean) * (y[i] - ymean); + } + + // the covariance is weighted by the length of the datasets. + return sum / (x.length - 1); + } + + // # [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) + // + // Gets a measure of how correlated two datasets are, between -1 and 1 + // + // depends on `sample_standard_deviation()` and `sample_covariance()` + function sample_correlation(x, y) { + var cov = sample_covariance(x, y), + xstd = sample_standard_deviation(x), + ystd = sample_standard_deviation(y); + + if (cov === null || xstd === null || ystd === null) { + return null; + } + + return cov / xstd / ystd; + } + + // # [median](http://en.wikipedia.org/wiki/Median) + // + // The middle number of a list. This is often a good indicator of 'the middle' + // when there are outliers that skew the `mean()` value. + function median(x) { + // The median of an empty list is null + if (x.length === 0) return null; + + // Sorting the array makes it easy to find the center, but + // use `.slice()` to ensure the original array `x` is not modified + var sorted = x.slice().sort(function (a, b) { return a - b; }); + + // If the length of the list is odd, it's the central number + if (sorted.length % 2 === 1) { + return sorted[(sorted.length - 1) / 2]; + // Otherwise, the median is the average of the two numbers + // at the center of the list + } else { + var a = sorted[(sorted.length / 2) - 1]; + var b = sorted[(sorted.length / 2)]; + return (a + b) / 2; + } + } + + // # [mode](http://bit.ly/W5K4Yt) + // + // The mode is the number that appears in a list the highest number of times. + // There can be multiple modes in a list: in the event of a tie, this + // algorithm will return the most recently seen mode. + // + // This implementation is inspired by [science.js](https://github.com/jasondavies/science.js/blob/master/src/stats/mode.js) + // + // This runs on `O(n)`, linear time in respect to the array + function mode(x) { + + // Handle edge cases: + // The median of an empty list is null + if (x.length === 0) return null; + else if (x.length === 1) return x[0]; + + // Sorting the array lets us iterate through it below and be sure + // that every time we see a new number it's new and we'll never + // see the same number twice + var sorted = x.slice().sort(function (a, b) { return a - b; }); + + // This assumes it is dealing with an array of size > 1, since size + // 0 and 1 are handled immediately. Hence it starts at index 1 in the + // array. + var last = sorted[0], + // store the mode as we find new modes + value, + // store how many times we've seen the mode + max_seen = 0, + // how many times the current candidate for the mode + // has been seen + seen_this = 1; + + // end at sorted.length + 1 to fix the case in which the mode is + // the highest number that occurs in the sequence. the last iteration + // compares sorted[i], which is undefined, to the highest number + // in the series + for (var i = 1; i < sorted.length + 1; i++) { + // we're seeing a new number pass by + if (sorted[i] !== last) { + // the last number is the new mode since we saw it more + // often than the old one + if (seen_this > max_seen) { + max_seen = seen_this; + value = last; + } + seen_this = 1; + last = sorted[i]; + // if this isn't a new number, it's one more occurrence of + // the potential mode + } else { seen_this++; } + } + return value; + } + + // # [t-test](http://en.wikipedia.org/wiki/Student's_t-test) + // + // This is to compute a one-sample t-test, comparing the mean + // of a sample to a known value, x. + // + // in this case, we're trying to determine whether the + // population mean is equal to the value that we know, which is `x` + // here. usually the results here are used to look up a + // [p-value](http://en.wikipedia.org/wiki/P-value), which, for + // a certain level of significance, will let you determine that the + // null hypothesis can or cannot be rejected. + // + // Depends on `standard_deviation()` and `mean()` + function t_test(sample, x) { + // The mean of the sample + var sample_mean = mean(sample); + + // The standard deviation of the sample + var sd = standard_deviation(sample); + + // Square root the length of the sample + var rootN = Math.sqrt(sample.length); + + // Compute the known value against the sample, + // returning the t value + return (sample_mean - x) / (sd / rootN); + } + + // # [2-sample t-test](http://en.wikipedia.org/wiki/Student's_t-test) + // + // This is to compute two sample t-test. + // Tests whether "mean(X)-mean(Y) = difference", ( + // in the most common case, we often have `difference == 0` to test if two samples + // are likely to be taken from populations with the same mean value) with + // no prior knowledge on standard deviations of both samples + // other than the fact that they have the same standard deviation. + // + // Usually the results here are used to look up a + // [p-value](http://en.wikipedia.org/wiki/P-value), which, for + // a certain level of significance, will let you determine that the + // null hypothesis can or cannot be rejected. + // + // `diff` can be omitted if it equals 0. + // + // [This is used to confirm or deny](http://www.monarchlab.org/Lab/Research/Stats/2SampleT.aspx) + // a null hypothesis that the two populations that have been sampled into + // `sample_x` and `sample_y` are equal to each other. + // + // Depends on `sample_variance()` and `mean()` + function t_test_two_sample(sample_x, sample_y, difference) { + var n = sample_x.length, + m = sample_y.length; + + // If either sample doesn't actually have any values, we can't + // compute this at all, so we return `null`. + if (!n || !m) return null ; + + // default difference (mu) is zero + if (!difference) difference = 0; + + var meanX = mean(sample_x), + meanY = mean(sample_y); + + var weightedVariance = ((n - 1) * sample_variance(sample_x) + + (m - 1) * sample_variance(sample_y)) / (n + m - 2); + + return (meanX - meanY - difference) / + Math.sqrt(weightedVariance * (1 / n + 1 / m)); + } + + // # chunk + // + // Split an array into chunks of a specified size. This function + // has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php) + // function, and thus will insert smaller-sized chunks at the end if + // the input size is not divisible by the chunk size. + // + // `sample` is expected to be an array, and `chunkSize` a number. + // The `sample` array can contain any kind of data. + function chunk(sample, chunkSize) { + + // a list of result chunks, as arrays in an array + var output = []; + + // `chunkSize` must be zero or higher - otherwise the loop below, + // in which we call `start += chunkSize`, will loop infinitely. + // So, we'll detect and return null in that case to indicate + // invalid input. + if (chunkSize <= 0) { + return null; + } + + // `start` is the index at which `.slice` will start selecting + // new array elements + for (var start = 0; start < sample.length; start += chunkSize) { + + // for each chunk, slice that part of the array and add it + // to the output. The `.slice` function does not change + // the original array. + output.push(sample.slice(start, start + chunkSize)); + } + return output; + } + + // # shuffle_in_place + // + // A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) + // in-place - which means that it will change the order of the original + // array by reference. + function shuffle_in_place(sample, randomSource) { + + // a custom random number source can be provided if you want to use + // a fixed seed or another random number generator, like + // [random-js](https://www.npmjs.org/package/random-js) + randomSource = randomSource || Math.random; + + // store the current length of the sample to determine + // when no elements remain to shuffle. + var length = sample.length; + + // temporary is used to hold an item when it is being + // swapped between indices. + var temporary; + + // The index to swap at each stage. + var index; + + // While there are still items to shuffle + while (length > 0) { + // chose a random index within the subset of the array + // that is not yet shuffled + index = Math.floor(randomSource() * length--); + + // store the value that we'll move temporarily + temporary = sample[length]; + + // swap the value at `sample[length]` with `sample[index]` + sample[length] = sample[index]; + sample[index] = temporary; + } + + return sample; + } + + // # shuffle + // + // A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) + // is a fast way to create a random permutation of a finite set. + function shuffle(sample, randomSource) { + // slice the original array so that it is not modified + sample = sample.slice(); + + // and then shuffle that shallow-copied array, in place + return shuffle_in_place(sample.slice(), randomSource); + } + + // # sample + // + // Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample) + // from a given array of `n` elements. + function sample(array, n, randomSource) { + // shuffle the original array using a fisher-yates shuffle + var shuffled = shuffle(array, randomSource); + + // and then return a subset of it - the first `n` elements. + return shuffled.slice(0, n); + } + + // # quantile + // + // This is a population quantile, since we assume to know the entire + // dataset in this library. Thus I'm trying to follow the + // [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population) + // algorithm from wikipedia. + // + // Sample is a one-dimensional array of numbers, + // and p is either a decimal number from 0 to 1 or an array of decimal + // numbers from 0 to 1. + // In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing + // with decimal values. + // When p is an array, the result of the function is also an array containing the appropriate + // quantiles in input order + function quantile(sample, p) { + + // We can't derive quantiles from an empty list + if (sample.length === 0) return null; + + // Sort a copy of the array. We'll need a sorted array to index + // the values in sorted order. + var sorted = sample.slice().sort(function (a, b) { return a - b; }); + + if (p.length) { + // Initialize the result array + var results = []; + // For each requested quantile + for (var i = 0; i < p.length; i++) { + results[i] = quantile_sorted(sorted, p[i]); + } + return results; + } else { + return quantile_sorted(sorted, p); + } + } + + // # quantile + // + // This is the internal implementation of quantiles: when you know + // that the order is sorted, you don't need to re-sort it, and the computations + // are much faster. + function quantile_sorted(sample, p) { + var idx = (sample.length) * p; + if (p < 0 || p > 1) { + return null; + } else if (p === 1) { + // If p is 1, directly return the last element + return sample[sample.length - 1]; + } else if (p === 0) { + // If p is 0, directly return the first element + return sample[0]; + } else if (idx % 1 !== 0) { + // If p is not integer, return the next element in array + return sample[Math.ceil(idx) - 1]; + } else if (sample.length % 2 === 0) { + // If the list has even-length, we'll take the average of this number + // and the next value, if there is one + return (sample[idx - 1] + sample[idx]) / 2; + } else { + // Finally, in the simple case of an integer value + // with an odd-length list, return the sample value at the index. + return sample[idx]; + } + } + + // # [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) + // + // A measure of statistical dispersion, or how scattered, spread, or + // concentrated a distribution is. It's computed as the difference between + // the third quartile and first quartile. + function iqr(sample) { + // We can't derive quantiles from an empty list + if (sample.length === 0) return null; + + // Interquartile range is the span between the upper quartile, + // at `0.75`, and lower quartile, `0.25` + return quantile(sample, 0.75) - quantile(sample, 0.25); + } + + // # [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation) + // + // The Median Absolute Deviation (MAD) is a robust measure of statistical + // dispersion. It is more resilient to outliers than the standard deviation. + function mad(x) { + // The mad of nothing is null + if (!x || x.length === 0) return null; + + var median_value = median(x), + median_absolute_deviations = []; + + // Make a list of absolute deviations from the median + for (var i = 0; i < x.length; i++) { + median_absolute_deviations.push(Math.abs(x[i] - median_value)); + } + + // Find the median value of that list + return median(median_absolute_deviations); + } + + // ## Compute Matrices for Jenks + // + // Compute the matrices required for Jenks breaks. These matrices + // can be used for any classing of data with `classes <= n_classes` + function jenksMatrices(data, n_classes) { + + // in the original implementation, these matrices are referred to + // as `LC` and `OP` + // + // * lower_class_limits (LC): optimal lower class limits + // * variance_combinations (OP): optimal variance combinations for all classes + var lower_class_limits = [], + variance_combinations = [], + // loop counters + i, j, + // the variance, as computed at each step in the calculation + variance = 0; + + // Initialize and fill each matrix with zeroes + for (i = 0; i < data.length + 1; i++) { + var tmp1 = [], tmp2 = []; + // despite these arrays having the same values, we need + // to keep them separate so that changing one does not change + // the other + for (j = 0; j < n_classes + 1; j++) { + tmp1.push(0); + tmp2.push(0); + } + lower_class_limits.push(tmp1); + variance_combinations.push(tmp2); + } + + for (i = 1; i < n_classes + 1; i++) { + lower_class_limits[1][i] = 1; + variance_combinations[1][i] = 0; + // in the original implementation, 9999999 is used but + // since Javascript has `Infinity`, we use that. + for (j = 2; j < data.length + 1; j++) { + variance_combinations[j][i] = Infinity; + } + } + + for (var l = 2; l < data.length + 1; l++) { + + // `SZ` originally. this is the sum of the values seen thus + // far when calculating variance. + var sum = 0, + // `ZSQ` originally. the sum of squares of values seen + // thus far + sum_squares = 0, + // `WT` originally. This is the number of + w = 0, + // `IV` originally + i4 = 0; + + // in several instances, you could say `Math.pow(x, 2)` + // instead of `x * x`, but this is slower in some browsers + // introduces an unnecessary concept. + for (var m = 1; m < l + 1; m++) { + + // `III` originally + var lower_class_limit = l - m + 1, + val = data[lower_class_limit - 1]; + + // here we're estimating variance for each potential classing + // of the data, for each potential number of classes. `w` + // is the number of data points considered so far. + w++; + + // increase the current sum and sum-of-squares + sum += val; + sum_squares += val * val; + + // the variance at this point in the sequence is the difference + // between the sum of squares and the total x 2, over the number + // of samples. + variance = sum_squares - (sum * sum) / w; + + i4 = lower_class_limit - 1; + + if (i4 !== 0) { + for (j = 2; j < n_classes + 1; j++) { + // if adding this element to an existing class + // will increase its variance beyond the limit, break + // the class at this point, setting the `lower_class_limit` + // at this point. + if (variance_combinations[l][j] >= + (variance + variance_combinations[i4][j - 1])) { + lower_class_limits[l][j] = lower_class_limit; + variance_combinations[l][j] = variance + + variance_combinations[i4][j - 1]; + } + } + } + } + + lower_class_limits[l][1] = 1; + variance_combinations[l][1] = variance; + } + + // return the two matrices. for just providing breaks, only + // `lower_class_limits` is needed, but variances can be useful to + // evaluate goodness of fit. + return { + lower_class_limits: lower_class_limits, + variance_combinations: variance_combinations + }; + } + + // ## Pull Breaks Values for Jenks + // + // the second part of the jenks recipe: take the calculated matrices + // and derive an array of n breaks. + function jenksBreaks(data, lower_class_limits, n_classes) { + + var k = data.length - 1, + kclass = [], + countNum = n_classes; + + // the calculation of classes will never include the upper and + // lower bounds, so we need to explicitly set them + kclass[n_classes] = data[data.length - 1]; + kclass[0] = data[0]; + + // the lower_class_limits matrix is used as indices into itself + // here: the `k` variable is reused in each iteration. + while (countNum > 1) { + kclass[countNum - 1] = data[lower_class_limits[k][countNum] - 2]; + k = lower_class_limits[k][countNum] - 1; + countNum--; + } + + return kclass; + } + + // # [Jenks natural breaks optimization](http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization) + // + // Implementations: [1](http://danieljlewis.org/files/2010/06/Jenks.pdf) (python), + // [2](https://github.com/vvoovv/djeo-jenks/blob/master/main.js) (buggy), + // [3](https://github.com/simogeo/geostats/blob/master/lib/geostats.js#L407) (works) + // + // Depends on `jenksBreaks()` and `jenksMatrices()` + function jenks(data, n_classes) { + + if (n_classes > data.length) return null; + + // sort data in numerical order, since this is expected + // by the matrices function + data = data.slice().sort(function (a, b) { return a - b; }); + + // get our basic matrices + var matrices = jenksMatrices(data, n_classes), + // we only need lower class limits here + lower_class_limits = matrices.lower_class_limits; + + // extract n_classes out of the computed matrices + return jenksBreaks(data, lower_class_limits, n_classes); + + } + + // # [Skewness](http://en.wikipedia.org/wiki/Skewness) + // + // A measure of the extent to which a probability distribution of a + // real-valued random variable "leans" to one side of the mean. + // The skewness value can be positive or negative, or even undefined. + // + // Implementation is based on the adjusted Fisher-Pearson standardized + // moment coefficient, which is the version found in Excel and several + // statistical packages including Minitab, SAS and SPSS. + // + // Depends on `sum_nth_power_deviations()` and `sample_standard_deviation` + function sample_skewness(x) { + // The skewness of less than three arguments is null + if (x.length < 3) return null; + + var n = x.length, + cubed_s = Math.pow(sample_standard_deviation(x), 3), + sum_cubed_deviations = sum_nth_power_deviations(x, 3); + + return n * sum_cubed_deviations / ((n - 1) * (n - 2) * cubed_s); + } + + // # Standard Normal Table + // A standard normal table, also called the unit normal table or Z table, + // is a mathematical table for the values of Φ (phi), which are the values of + // the cumulative distribution function of the normal distribution. + // It is used to find the probability that a statistic is observed below, + // above, or between values on the standard normal distribution, and by + // extension, any normal distribution. + // + // The probabilities are taken from http://en.wikipedia.org/wiki/Standard_normal_table + // The table used is the cumulative, and not cumulative from 0 to mean + // (even though the latter has 5 digits precision, instead of 4). + var standard_normal_table = [ + /* z 0.00 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 */ + /* 0.0 */ + 0.5000, 0.5040, 0.5080, 0.5120, 0.5160, 0.5199, 0.5239, 0.5279, 0.5319, 0.5359, + /* 0.1 */ + 0.5398, 0.5438, 0.5478, 0.5517, 0.5557, 0.5596, 0.5636, 0.5675, 0.5714, 0.5753, + /* 0.2 */ + 0.5793, 0.5832, 0.5871, 0.5910, 0.5948, 0.5987, 0.6026, 0.6064, 0.6103, 0.6141, + /* 0.3 */ + 0.6179, 0.6217, 0.6255, 0.6293, 0.6331, 0.6368, 0.6406, 0.6443, 0.6480, 0.6517, + /* 0.4 */ + 0.6554, 0.6591, 0.6628, 0.6664, 0.6700, 0.6736, 0.6772, 0.6808, 0.6844, 0.6879, + /* 0.5 */ + 0.6915, 0.6950, 0.6985, 0.7019, 0.7054, 0.7088, 0.7123, 0.7157, 0.7190, 0.7224, + /* 0.6 */ + 0.7257, 0.7291, 0.7324, 0.7357, 0.7389, 0.7422, 0.7454, 0.7486, 0.7517, 0.7549, + /* 0.7 */ + 0.7580, 0.7611, 0.7642, 0.7673, 0.7704, 0.7734, 0.7764, 0.7794, 0.7823, 0.7852, + /* 0.8 */ + 0.7881, 0.7910, 0.7939, 0.7967, 0.7995, 0.8023, 0.8051, 0.8078, 0.8106, 0.8133, + /* 0.9 */ + 0.8159, 0.8186, 0.8212, 0.8238, 0.8264, 0.8289, 0.8315, 0.8340, 0.8365, 0.8389, + /* 1.0 */ + 0.8413, 0.8438, 0.8461, 0.8485, 0.8508, 0.8531, 0.8554, 0.8577, 0.8599, 0.8621, + /* 1.1 */ + 0.8643, 0.8665, 0.8686, 0.8708, 0.8729, 0.8749, 0.8770, 0.8790, 0.8810, 0.8830, + /* 1.2 */ + 0.8849, 0.8869, 0.8888, 0.8907, 0.8925, 0.8944, 0.8962, 0.8980, 0.8997, 0.9015, + /* 1.3 */ + 0.9032, 0.9049, 0.9066, 0.9082, 0.9099, 0.9115, 0.9131, 0.9147, 0.9162, 0.9177, + /* 1.4 */ + 0.9192, 0.9207, 0.9222, 0.9236, 0.9251, 0.9265, 0.9279, 0.9292, 0.9306, 0.9319, + /* 1.5 */ + 0.9332, 0.9345, 0.9357, 0.9370, 0.9382, 0.9394, 0.9406, 0.9418, 0.9429, 0.9441, + /* 1.6 */ + 0.9452, 0.9463, 0.9474, 0.9484, 0.9495, 0.9505, 0.9515, 0.9525, 0.9535, 0.9545, + /* 1.7 */ + 0.9554, 0.9564, 0.9573, 0.9582, 0.9591, 0.9599, 0.9608, 0.9616, 0.9625, 0.9633, + /* 1.8 */ + 0.9641, 0.9649, 0.9656, 0.9664, 0.9671, 0.9678, 0.9686, 0.9693, 0.9699, 0.9706, + /* 1.9 */ + 0.9713, 0.9719, 0.9726, 0.9732, 0.9738, 0.9744, 0.9750, 0.9756, 0.9761, 0.9767, + /* 2.0 */ + 0.9772, 0.9778, 0.9783, 0.9788, 0.9793, 0.9798, 0.9803, 0.9808, 0.9812, 0.9817, + /* 2.1 */ + 0.9821, 0.9826, 0.9830, 0.9834, 0.9838, 0.9842, 0.9846, 0.9850, 0.9854, 0.9857, + /* 2.2 */ + 0.9861, 0.9864, 0.9868, 0.9871, 0.9875, 0.9878, 0.9881, 0.9884, 0.9887, 0.9890, + /* 2.3 */ + 0.9893, 0.9896, 0.9898, 0.9901, 0.9904, 0.9906, 0.9909, 0.9911, 0.9913, 0.9916, + /* 2.4 */ + 0.9918, 0.9920, 0.9922, 0.9925, 0.9927, 0.9929, 0.9931, 0.9932, 0.9934, 0.9936, + /* 2.5 */ + 0.9938, 0.9940, 0.9941, 0.9943, 0.9945, 0.9946, 0.9948, 0.9949, 0.9951, 0.9952, + /* 2.6 */ + 0.9953, 0.9955, 0.9956, 0.9957, 0.9959, 0.9960, 0.9961, 0.9962, 0.9963, 0.9964, + /* 2.7 */ + 0.9965, 0.9966, 0.9967, 0.9968, 0.9969, 0.9970, 0.9971, 0.9972, 0.9973, 0.9974, + /* 2.8 */ + 0.9974, 0.9975, 0.9976, 0.9977, 0.9977, 0.9978, 0.9979, 0.9979, 0.9980, 0.9981, + /* 2.9 */ + 0.9981, 0.9982, 0.9982, 0.9983, 0.9984, 0.9984, 0.9985, 0.9985, 0.9986, 0.9986, + /* 3.0 */ + 0.9987, 0.9987, 0.9987, 0.9988, 0.9988, 0.9989, 0.9989, 0.9989, 0.9990, 0.9990 + ]; + + // # [Cumulative Standard Normal Probability](http://en.wikipedia.org/wiki/Standard_normal_table) + // + // Since probability tables cannot be + // printed for every normal distribution, as there are an infinite variety + // of normal distributions, it is common practice to convert a normal to a + // standard normal and then use the standard normal table to find probabilities + function cumulative_std_normal_probability(z) { + + // Calculate the position of this value. + var absZ = Math.abs(z), + // Each row begins with a different + // significant digit: 0.5, 0.6, 0.7, and so on. So the row is simply + // this value's significant digit: 0.567 will be in row 0, so row=0, + // 0.643 will be in row 1, so row=10. + row = Math.floor(absZ * 10), + column = 10 * (Math.floor(absZ * 100) / 10 - Math.floor(absZ * 100 / 10)), + index = Math.min((row * 10) + column, standard_normal_table.length - 1); + + // The index we calculate must be in the table as a positive value, + // but we still pay attention to whether the input is positive + // or negative, and flip the output value as a last step. + if (z >= 0) { + return standard_normal_table[index]; + } else { + // due to floating-point arithmetic, values in the table with + // 4 significant figures can nevertheless end up as repeating + // fractions when they're computed here. + return +(1 - standard_normal_table[index]).toFixed(4); + } + } + + // # [Z-Score, or Standard Score](http://en.wikipedia.org/wiki/Standard_score) + // + // The standard score is the number of standard deviations an observation + // or datum is above or below the mean. Thus, a positive standard score + // represents a datum above the mean, while a negative standard score + // represents a datum below the mean. It is a dimensionless quantity + // obtained by subtracting the population mean from an individual raw + // score and then dividing the difference by the population standard + // deviation. + // + // The z-score is only defined if one knows the population parameters; + // if one only has a sample set, then the analogous computation with + // sample mean and sample standard deviation yields the + // Student's t-statistic. + function z_score(x, mean, standard_deviation) { + return (x - mean) / standard_deviation; + } + + // We use `ε`, epsilon, as a stopping criterion when we want to iterate + // until we're "close enough". + var epsilon = 0.0001; + + // # [Factorial](https://en.wikipedia.org/wiki/Factorial) + // + // A factorial, usually written n!, is the product of all positive + // integers less than or equal to n. Often factorial is implemented + // recursively, but this iterative approach is significantly faster + // and simpler. + function factorial(n) { + + // factorial is mathematically undefined for negative numbers + if (n < 0 ) { return null; } + + // typically you'll expand the factorial function going down, like + // 5! = 5 * 4 * 3 * 2 * 1. This is going in the opposite direction, + // counting from 2 up to the number in question, and since anything + // multiplied by 1 is itself, the loop only needs to start at 2. + var accumulator = 1; + for (var i = 2; i <= n; i++) { + // for each number up to and including the number `n`, multiply + // the accumulator my that number. + accumulator *= i; + } + return accumulator; + } + + // # Bernoulli Distribution + // + // The [Bernoulli distribution](http://en.wikipedia.org/wiki/Bernoulli_distribution) + // is the probability discrete + // distribution of a random variable which takes value 1 with success + // probability `p` and value 0 with failure + // probability `q` = 1 - `p`. It can be used, for example, to represent the + // toss of a coin, where "1" is defined to mean "heads" and "0" is defined + // to mean "tails" (or vice versa). It is + // a special case of a Binomial Distribution + // where `n` = 1. + function bernoulli_distribution(p) { + // Check that `p` is a valid probability (0 ≤ p ≤ 1) + if (p < 0 || p > 1 ) { return null; } + + return binomial_distribution(1, p); + } + + // # Binomial Distribution + // + // The [Binomial Distribution](http://en.wikipedia.org/wiki/Binomial_distribution) is the discrete probability + // distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields + // success with probability `probability`. Such a success/failure experiment is also called a Bernoulli experiment or + // Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution. + function binomial_distribution(trials, probability) { + // Check that `p` is a valid probability (0 ≤ p ≤ 1), + // that `n` is an integer, strictly positive. + if (probability < 0 || probability > 1 || + trials <= 0 || trials % 1 !== 0) { + return null; + } + + // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) + function probability_mass(x, trials, probability) { + return factorial(trials) / + (factorial(x) * factorial(trials - x)) * + (Math.pow(probability, x) * Math.pow(1 - probability, trials - x)); + } + + // We initialize `x`, the random variable, and `accumulator`, an accumulator + // for the cumulative distribution function to 0. `distribution_functions` + // is the object we'll return with the `probability_of_x` and the + // `cumulative_probability_of_x`, as well as the calculated mean & + // variance. We iterate until the `cumulative_probability_of_x` is + // within `epsilon` of 1.0. + var x = 0, + cumulative_probability = 0, + cells = {}; + + // This algorithm iterates through each potential outcome, + // until the `cumulative_probability` is very close to 1, at + // which point we've defined the vast majority of outcomes + do { + cells[x] = probability_mass(x, trials, probability); + cumulative_probability += cells[x]; + x++; + // when the cumulative_probability is nearly 1, we've calculated + // the useful range of this distribution + } while (cumulative_probability < 1 - epsilon); + + return cells; + } + + // # Poisson Distribution + // + // The [Poisson Distribution](http://en.wikipedia.org/wiki/Poisson_distribution) + // is a discrete probability distribution that expresses the probability + // of a given number of events occurring in a fixed interval of time + // and/or space if these events occur with a known average rate and + // independently of the time since the last event. + // + // The Poisson Distribution is characterized by the strictly positive + // mean arrival or occurrence rate, `λ`. + function poisson_distribution(lambda) { + // Check that lambda is strictly positive + if (lambda <= 0) { return null; } + + // our current place in the distribution + var x = 0, + // and we keep track of the current cumulative probability, in + // order to know when to stop calculating chances. + cumulative_probability = 0, + // the calculated cells to be returned + cells = {}; + + // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) + function probability_mass(x, lambda) { + return (Math.pow(Math.E, -lambda) * Math.pow(lambda, x)) / + factorial(x); + } + + // This algorithm iterates through each potential outcome, + // until the `cumulative_probability` is very close to 1, at + // which point we've defined the vast majority of outcomes + do { + cells[x] = probability_mass(x, lambda); + cumulative_probability += cells[x]; + x++; + // when the cumulative_probability is nearly 1, we've calculated + // the useful range of this distribution + } while (cumulative_probability < 1 - epsilon); + + return cells; + } + + // # Percentage Points of the χ2 (Chi-Squared) Distribution + // The [χ2 (Chi-Squared) Distribution](http://en.wikipedia.org/wiki/Chi-squared_distribution) is used in the common + // chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two + // criteria of classification of qualitative data, and in confidence interval estimation for a population standard + // deviation of a normal distribution from a sample standard deviation. + // + // Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, "Probability and Statistics in + // Engineering and Management Science", Wiley (1980). + var chi_squared_distribution_table = { + 1: { 0.995: 0.00, 0.99: 0.00, 0.975: 0.00, 0.95: 0.00, 0.9: 0.02, 0.5: 0.45, 0.1: 2.71, 0.05: 3.84, 0.025: 5.02, 0.01: 6.63, 0.005: 7.88 }, + 2: { 0.995: 0.01, 0.99: 0.02, 0.975: 0.05, 0.95: 0.10, 0.9: 0.21, 0.5: 1.39, 0.1: 4.61, 0.05: 5.99, 0.025: 7.38, 0.01: 9.21, 0.005: 10.60 }, + 3: { 0.995: 0.07, 0.99: 0.11, 0.975: 0.22, 0.95: 0.35, 0.9: 0.58, 0.5: 2.37, 0.1: 6.25, 0.05: 7.81, 0.025: 9.35, 0.01: 11.34, 0.005: 12.84 }, + 4: { 0.995: 0.21, 0.99: 0.30, 0.975: 0.48, 0.95: 0.71, 0.9: 1.06, 0.5: 3.36, 0.1: 7.78, 0.05: 9.49, 0.025: 11.14, 0.01: 13.28, 0.005: 14.86 }, + 5: { 0.995: 0.41, 0.99: 0.55, 0.975: 0.83, 0.95: 1.15, 0.9: 1.61, 0.5: 4.35, 0.1: 9.24, 0.05: 11.07, 0.025: 12.83, 0.01: 15.09, 0.005: 16.75 }, + 6: { 0.995: 0.68, 0.99: 0.87, 0.975: 1.24, 0.95: 1.64, 0.9: 2.20, 0.5: 5.35, 0.1: 10.65, 0.05: 12.59, 0.025: 14.45, 0.01: 16.81, 0.005: 18.55 }, + 7: { 0.995: 0.99, 0.99: 1.25, 0.975: 1.69, 0.95: 2.17, 0.9: 2.83, 0.5: 6.35, 0.1: 12.02, 0.05: 14.07, 0.025: 16.01, 0.01: 18.48, 0.005: 20.28 }, + 8: { 0.995: 1.34, 0.99: 1.65, 0.975: 2.18, 0.95: 2.73, 0.9: 3.49, 0.5: 7.34, 0.1: 13.36, 0.05: 15.51, 0.025: 17.53, 0.01: 20.09, 0.005: 21.96 }, + 9: { 0.995: 1.73, 0.99: 2.09, 0.975: 2.70, 0.95: 3.33, 0.9: 4.17, 0.5: 8.34, 0.1: 14.68, 0.05: 16.92, 0.025: 19.02, 0.01: 21.67, 0.005: 23.59 }, + 10: { 0.995: 2.16, 0.99: 2.56, 0.975: 3.25, 0.95: 3.94, 0.9: 4.87, 0.5: 9.34, 0.1: 15.99, 0.05: 18.31, 0.025: 20.48, 0.01: 23.21, 0.005: 25.19 }, + 11: { 0.995: 2.60, 0.99: 3.05, 0.975: 3.82, 0.95: 4.57, 0.9: 5.58, 0.5: 10.34, 0.1: 17.28, 0.05: 19.68, 0.025: 21.92, 0.01: 24.72, 0.005: 26.76 }, + 12: { 0.995: 3.07, 0.99: 3.57, 0.975: 4.40, 0.95: 5.23, 0.9: 6.30, 0.5: 11.34, 0.1: 18.55, 0.05: 21.03, 0.025: 23.34, 0.01: 26.22, 0.005: 28.30 }, + 13: { 0.995: 3.57, 0.99: 4.11, 0.975: 5.01, 0.95: 5.89, 0.9: 7.04, 0.5: 12.34, 0.1: 19.81, 0.05: 22.36, 0.025: 24.74, 0.01: 27.69, 0.005: 29.82 }, + 14: { 0.995: 4.07, 0.99: 4.66, 0.975: 5.63, 0.95: 6.57, 0.9: 7.79, 0.5: 13.34, 0.1: 21.06, 0.05: 23.68, 0.025: 26.12, 0.01: 29.14, 0.005: 31.32 }, + 15: { 0.995: 4.60, 0.99: 5.23, 0.975: 6.27, 0.95: 7.26, 0.9: 8.55, 0.5: 14.34, 0.1: 22.31, 0.05: 25.00, 0.025: 27.49, 0.01: 30.58, 0.005: 32.80 }, + 16: { 0.995: 5.14, 0.99: 5.81, 0.975: 6.91, 0.95: 7.96, 0.9: 9.31, 0.5: 15.34, 0.1: 23.54, 0.05: 26.30, 0.025: 28.85, 0.01: 32.00, 0.005: 34.27 }, + 17: { 0.995: 5.70, 0.99: 6.41, 0.975: 7.56, 0.95: 8.67, 0.9: 10.09, 0.5: 16.34, 0.1: 24.77, 0.05: 27.59, 0.025: 30.19, 0.01: 33.41, 0.005: 35.72 }, + 18: { 0.995: 6.26, 0.99: 7.01, 0.975: 8.23, 0.95: 9.39, 0.9: 10.87, 0.5: 17.34, 0.1: 25.99, 0.05: 28.87, 0.025: 31.53, 0.01: 34.81, 0.005: 37.16 }, + 19: { 0.995: 6.84, 0.99: 7.63, 0.975: 8.91, 0.95: 10.12, 0.9: 11.65, 0.5: 18.34, 0.1: 27.20, 0.05: 30.14, 0.025: 32.85, 0.01: 36.19, 0.005: 38.58 }, + 20: { 0.995: 7.43, 0.99: 8.26, 0.975: 9.59, 0.95: 10.85, 0.9: 12.44, 0.5: 19.34, 0.1: 28.41, 0.05: 31.41, 0.025: 34.17, 0.01: 37.57, 0.005: 40.00 }, + 21: { 0.995: 8.03, 0.99: 8.90, 0.975: 10.28, 0.95: 11.59, 0.9: 13.24, 0.5: 20.34, 0.1: 29.62, 0.05: 32.67, 0.025: 35.48, 0.01: 38.93, 0.005: 41.40 }, + 22: { 0.995: 8.64, 0.99: 9.54, 0.975: 10.98, 0.95: 12.34, 0.9: 14.04, 0.5: 21.34, 0.1: 30.81, 0.05: 33.92, 0.025: 36.78, 0.01: 40.29, 0.005: 42.80 }, + 23: { 0.995: 9.26, 0.99: 10.20, 0.975: 11.69, 0.95: 13.09, 0.9: 14.85, 0.5: 22.34, 0.1: 32.01, 0.05: 35.17, 0.025: 38.08, 0.01: 41.64, 0.005: 44.18 }, + 24: { 0.995: 9.89, 0.99: 10.86, 0.975: 12.40, 0.95: 13.85, 0.9: 15.66, 0.5: 23.34, 0.1: 33.20, 0.05: 36.42, 0.025: 39.36, 0.01: 42.98, 0.005: 45.56 }, + 25: { 0.995: 10.52, 0.99: 11.52, 0.975: 13.12, 0.95: 14.61, 0.9: 16.47, 0.5: 24.34, 0.1: 34.28, 0.05: 37.65, 0.025: 40.65, 0.01: 44.31, 0.005: 46.93 }, + 26: { 0.995: 11.16, 0.99: 12.20, 0.975: 13.84, 0.95: 15.38, 0.9: 17.29, 0.5: 25.34, 0.1: 35.56, 0.05: 38.89, 0.025: 41.92, 0.01: 45.64, 0.005: 48.29 }, + 27: { 0.995: 11.81, 0.99: 12.88, 0.975: 14.57, 0.95: 16.15, 0.9: 18.11, 0.5: 26.34, 0.1: 36.74, 0.05: 40.11, 0.025: 43.19, 0.01: 46.96, 0.005: 49.65 }, + 28: { 0.995: 12.46, 0.99: 13.57, 0.975: 15.31, 0.95: 16.93, 0.9: 18.94, 0.5: 27.34, 0.1: 37.92, 0.05: 41.34, 0.025: 44.46, 0.01: 48.28, 0.005: 50.99 }, + 29: { 0.995: 13.12, 0.99: 14.26, 0.975: 16.05, 0.95: 17.71, 0.9: 19.77, 0.5: 28.34, 0.1: 39.09, 0.05: 42.56, 0.025: 45.72, 0.01: 49.59, 0.005: 52.34 }, + 30: { 0.995: 13.79, 0.99: 14.95, 0.975: 16.79, 0.95: 18.49, 0.9: 20.60, 0.5: 29.34, 0.1: 40.26, 0.05: 43.77, 0.025: 46.98, 0.01: 50.89, 0.005: 53.67 }, + 40: { 0.995: 20.71, 0.99: 22.16, 0.975: 24.43, 0.95: 26.51, 0.9: 29.05, 0.5: 39.34, 0.1: 51.81, 0.05: 55.76, 0.025: 59.34, 0.01: 63.69, 0.005: 66.77 }, + 50: { 0.995: 27.99, 0.99: 29.71, 0.975: 32.36, 0.95: 34.76, 0.9: 37.69, 0.5: 49.33, 0.1: 63.17, 0.05: 67.50, 0.025: 71.42, 0.01: 76.15, 0.005: 79.49 }, + 60: { 0.995: 35.53, 0.99: 37.48, 0.975: 40.48, 0.95: 43.19, 0.9: 46.46, 0.5: 59.33, 0.1: 74.40, 0.05: 79.08, 0.025: 83.30, 0.01: 88.38, 0.005: 91.95 }, + 70: { 0.995: 43.28, 0.99: 45.44, 0.975: 48.76, 0.95: 51.74, 0.9: 55.33, 0.5: 69.33, 0.1: 85.53, 0.05: 90.53, 0.025: 95.02, 0.01: 100.42, 0.005: 104.22 }, + 80: { 0.995: 51.17, 0.99: 53.54, 0.975: 57.15, 0.95: 60.39, 0.9: 64.28, 0.5: 79.33, 0.1: 96.58, 0.05: 101.88, 0.025: 106.63, 0.01: 112.33, 0.005: 116.32 }, + 90: { 0.995: 59.20, 0.99: 61.75, 0.975: 65.65, 0.95: 69.13, 0.9: 73.29, 0.5: 89.33, 0.1: 107.57, 0.05: 113.14, 0.025: 118.14, 0.01: 124.12, 0.005: 128.30 }, + 100: { 0.995: 67.33, 0.99: 70.06, 0.975: 74.22, 0.95: 77.93, 0.9: 82.36, 0.5: 99.33, 0.1: 118.50, 0.05: 124.34, 0.025: 129.56, 0.01: 135.81, 0.005: 140.17 } + }; + + // # χ2 (Chi-Squared) Goodness-of-Fit Test + // + // The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test) + // uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies + // (that is, counts of observations), each squared and divided by the number of observations expected given the + // hypothesized distribution. The resulting χ2 statistic, `chi_squared`, can be compared to the chi-squared distribution + // to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one + // takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic + // follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty + // cells and `c` is the number of estimated parameters for the distribution. + function chi_squared_goodness_of_fit(data, distribution_type, significance) { + // Estimate from the sample data, a weighted mean. + var input_mean = mean(data), + // Calculated value of the χ2 statistic. + chi_squared = 0, + // Degrees of freedom, calculated as (number of class intervals - + // number of hypothesized distribution parameters estimated - 1) + degrees_of_freedom, + // Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test. + // Lose one degree of freedom for estimating `lambda` from the sample data. + c = 1, + // The hypothesized distribution. + // Generate the hypothesized distribution. + hypothesized_distribution = distribution_type(input_mean), + observed_frequencies = [], + expected_frequencies = [], + k; + + // Create an array holding a histogram from the sample data, of + // the form `{ value: numberOfOcurrences }` + for (var i = 0; i < data.length; i++) { + if (observed_frequencies[data[i]] === undefined) { + observed_frequencies[data[i]] = 0; + } + observed_frequencies[data[i]]++; + } + + // The histogram we created might be sparse - there might be gaps + // between values. So we iterate through the histogram, making + // sure that instead of undefined, gaps have 0 values. + for (i = 0; i < observed_frequencies.length; i++) { + if (observed_frequencies[i] === undefined) { + observed_frequencies[i] = 0; + } + } + + // Create an array holding a histogram of expected data given the + // sample size and hypothesized distribution. + for (k in hypothesized_distribution) { + if (k in observed_frequencies) { + expected_frequencies[k] = hypothesized_distribution[k] * data.length; + } + } + + // Working backward through the expected frequencies, collapse classes + // if less than three observations are expected for a class. + // This transformation is applied to the observed frequencies as well. + for (k = expected_frequencies.length - 1; k >= 0; k--) { + if (expected_frequencies[k] < 3) { + expected_frequencies[k - 1] += expected_frequencies[k]; + expected_frequencies.pop(); + + observed_frequencies[k - 1] += observed_frequencies[k]; + observed_frequencies.pop(); + } + } + + // Iterate through the squared differences between observed & expected + // frequencies, accumulating the `chi_squared` statistic. + for (k = 0; k < observed_frequencies.length; k++) { + chi_squared += Math.pow( + observed_frequencies[k] - expected_frequencies[k], 2) / + expected_frequencies[k]; + } + + // Calculate degrees of freedom for this test and look it up in the + // `chi_squared_distribution_table` in order to + // accept or reject the goodness-of-fit of the hypothesized distribution. + degrees_of_freedom = observed_frequencies.length - c - 1; + return chi_squared_distribution_table[degrees_of_freedom][significance] < chi_squared; + } + + // # Mixin + // + // Mixin simple_statistics to a single Array instance if provided + // or the Array native object if not. This is an optional + // feature that lets you treat simple_statistics as a native feature + // of Javascript. + function mixin(array) { + var support = !!(Object.defineProperty && Object.defineProperties); + if (!support) throw new Error('without defineProperty, simple-statistics cannot be mixed in'); + + // only methods which work on basic arrays in a single step + // are supported + var arrayMethods = ['median', 'standard_deviation', 'sum', + 'sample_skewness', + 'mean', 'min', 'max', 'quantile', 'geometric_mean', + 'harmonic_mean', 'root_mean_square']; + + // create a closure with a method name so that a reference + // like `arrayMethods[i]` doesn't follow the loop increment + function wrap(method) { + return function() { + // cast any arguments into an array, since they're + // natively objects + var args = Array.prototype.slice.apply(arguments); + // make the first argument the array itself + args.unshift(this); + // return the result of the ss method + return ss[method].apply(ss, args); + }; + } + + // select object to extend + var extending; + if (array) { + // create a shallow copy of the array so that our internal + // operations do not change it by reference + extending = array.slice(); + } else { + extending = Array.prototype; + } + + // for each array function, define a function that gets + // the array as the first argument. + // We use [defineProperty](https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/Object/defineProperty) + // because it allows these properties to be non-enumerable: + // `for (var in x)` loops will not run into problems with this + // implementation. + for (var i = 0; i < arrayMethods.length; i++) { + Object.defineProperty(extending, arrayMethods[i], { + value: wrap(arrayMethods[i]), + configurable: true, + enumerable: false, + writable: true + }); + } + + return extending; + } + + ss.linear_regression = linear_regression; + ss.standard_deviation = standard_deviation; + ss.r_squared = r_squared; + ss.median = median; + ss.mean = mean; + ss.mode = mode; + ss.min = min; + ss.max = max; + ss.sum = sum; + ss.quantile = quantile; + ss.quantile_sorted = quantile_sorted; + ss.iqr = iqr; + ss.mad = mad; + + ss.chunk = chunk; + ss.shuffle = shuffle; + ss.shuffle_in_place = shuffle_in_place; + + ss.sample = sample; + + ss.sample_covariance = sample_covariance; + ss.sample_correlation = sample_correlation; + ss.sample_variance = sample_variance; + ss.sample_standard_deviation = sample_standard_deviation; + ss.sample_skewness = sample_skewness; + + ss.geometric_mean = geometric_mean; + ss.harmonic_mean = harmonic_mean; + ss.root_mean_square = root_mean_square; + ss.variance = variance; + ss.t_test = t_test; + ss.t_test_two_sample = t_test_two_sample; + + // jenks + ss.jenksMatrices = jenksMatrices; + ss.jenksBreaks = jenksBreaks; + ss.jenks = jenks; + + ss.bayesian = bayesian; + + // Distribution-related methods + ss.epsilon = epsilon; // We make ε available to the test suite. + ss.factorial = factorial; + ss.bernoulli_distribution = bernoulli_distribution; + ss.binomial_distribution = binomial_distribution; + ss.poisson_distribution = poisson_distribution; + ss.chi_squared_goodness_of_fit = chi_squared_goodness_of_fit; + + // Normal distribution + ss.z_score = z_score; + ss.cumulative_std_normal_probability = cumulative_std_normal_probability; + ss.standard_normal_table = standard_normal_table; + + // Alias this into its common name + ss.average = mean; + ss.interquartile_range = iqr; + ss.mixin = mixin; + ss.median_absolute_deviation = mad; + ss.rms = root_mean_square; + +})(this); diff --git a/perf/simple-statistics/test/bayes.test.js b/perf/simple-statistics/test/bayes.test.js new file mode 100644 index 0000000..6667f29 --- /dev/null +++ b/perf/simple-statistics/test/bayes.test.js @@ -0,0 +1,114 @@ +var ss = require('../'); +var test = require('tape'); + +test('bayes', function(t) { + test('makes an easy call with one training round', function(t) { + var bayes = ss.bayesian(); + bayes.train({ + species: 'Cat' + }, 'animal'); + t.deepEqual(bayes.score({ + species: 'Cat' + }), { + animal: 1 + }); + t.end(); + }); + + test('makes fify-fifty call', function(t) { + var bayes = ss.bayesian(); + bayes.train({ + species: 'Cat' + }, 'animal'); + bayes.train({ + species: 'Cat' + }, 'chair'); + t.deepEqual(bayes.score({ + species: 'Cat' + }), { + animal: 0.5, + chair: 0.5 + }); + t.end(); + }); + + test('makes seventy-five/twenty-five call', function(t) { + var bayes = ss.bayesian(); + bayes.train({ + species: 'Cat' + }, 'animal'); + bayes.train({ + species: 'Cat' + }, 'animal'); + bayes.train({ + species: 'Cat' + }, 'animal'); + bayes.train({ + species: 'Cat' + }, 'chair'); + t.deepEqual(bayes.score({ + species: 'Cat' + }), { + animal: 0.75, + chair: 0.25 + }); + t.end(); + }); + + test('tests multiple properties', function(t) { + var bayes = ss.bayesian(); + bayes.train({ + species: 'Cat' + }, 'animal'); + bayes.train({ + species: 'Cat' + }, 'animal'); + bayes.train({ + species: 'Cat' + }, 'animal'); + bayes.train({ + species: 'Cat' + }, 'chair'); + bayes.train({ + species: 'Cat', + color: 'white' + }, 'chair'); + t.deepEqual(bayes.score({ + color: 'white' + }), { + animal: 0, + chair: 0.2 + }); + t.end(); + }); + + test('classifies multiple things', function(t) { + var bayes = ss.bayesian(); + bayes.train({ + species: 'Cat' + }, 'animal'); + bayes.train({ + species: 'Dog' + }, 'animal'); + bayes.train({ + species: 'Dog' + }, 'animal'); + bayes.train({ + species: 'Cat' + }, 'chair'); + t.deepEqual(bayes.score({ + species: 'Cat' + }), { + animal: 0.25, + chair: 0.25 + }); + t.deepEqual(bayes.score({ + species: 'Dog' + }), { + animal: 0.5, + chair: 0 + }); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/bernoulli_distribution.test.js b/perf/simple-statistics/test/bernoulli_distribution.test.js new file mode 100644 index 0000000..b64eb0d --- /dev/null +++ b/perf/simple-statistics/test/bernoulli_distribution.test.js @@ -0,0 +1,17 @@ +var test = require('tape'); +var ss = require('../'); + +test('bernoulli_distribution', function(t) { + test('can return generate probability and cumulative probability distributions for p = 0.3', function(t) { + t.equal('object', typeof ss.bernoulli_distribution(0.3)); + t.equal(ss.bernoulli_distribution(0.3)[0], 0.7, ss.epsilon); + t.equal(ss.bernoulli_distribution(0.3)[1], 0.3, ss.epsilon); + t.end(); + }); + test('can return null when p is not a valid probability', function(t) { + t.equal(null, ss.bernoulli_distribution(-0.01), 'p should be greater than 0.0'); + t.equal(null, ss.bernoulli_distribution(1.5), 'p should be less than 1.0'); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/binomial_distribution.test.js b/perf/simple-statistics/test/binomial_distribution.test.js new file mode 100644 index 0000000..5bd45ea --- /dev/null +++ b/perf/simple-statistics/test/binomial_distribution.test.js @@ -0,0 +1,31 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(n) { + return parseFloat(n.toFixed(4)); +} + +test('binomial_distribution', function(t) { + // Data given in the [Wikipedia example](http://en.wikipedia.org/wiki/Binomial_distribution#Example) retrieved 29 Mar 2014 + // Cumulative probabilities worked by hand to mitigate accumulated rounding errors. + test('can return generate probability and cumulative probability distributions for n = 6, p = 0.3', function(t) { + t.equal('object', typeof ss.binomial_distribution(6, 0.3)); + t.equal(rnd(ss.binomial_distribution(6, 0.3)[0]), 0.1176, ss.epsilon); + t.equal(rnd(ss.binomial_distribution(6, 0.3)[1]), 0.3025, ss.epsilon); + t.equal(rnd(ss.binomial_distribution(6, 0.3)[2]), 0.3241, ss.epsilon); + t.equal(rnd(ss.binomial_distribution(6, 0.3)[3]), 0.1852, ss.epsilon); + t.equal(rnd(ss.binomial_distribution(6, 0.3)[4]), 0.0595, ss.epsilon); + t.equal(rnd(ss.binomial_distribution(6, 0.3)[5]), 0.0102, ss.epsilon); + t.equal(rnd(ss.binomial_distribution(6, 0.3)[6]), 0.0007, ss.epsilon); + t.end(); + }); + + test('can return null when p or n are not valid parameters', function(t) { + t.equal(null, ss.binomial_distribution(0, 0.5), 'n should be strictly positive'); + t.equal(null, ss.binomial_distribution(1.5, 0.5), 'n should be an integer'); + t.equal(null, ss.binomial_distribution(2, -0.01), 'p should be greater than 0.0'); + t.equal(null, ss.binomial_distribution(2, 1.5), 'p should be less than 1.0'); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/chi_squared_goodness_of_fit.test.js b/perf/simple-statistics/test/chi_squared_goodness_of_fit.test.js new file mode 100644 index 0000000..84a0cb8 --- /dev/null +++ b/perf/simple-statistics/test/chi_squared_goodness_of_fit.test.js @@ -0,0 +1,23 @@ +var test = require('tape'); +var ss = require('../'); + +// Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery, +// "Probability and Statistics in Engineering and Management Science", Wiley (1980). +var data_10_19 = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3 +]; + +test('chi_squared_goodness_of_fit', function(t) { + test('can reject the null hypothesis with level of confidence specified at 0.05', function(t) { + t.equal(false, ss.chi_squared_goodness_of_fit(data_10_19, ss.poisson_distribution, 0.05)); + t.end(); + }); + test('can accept the null hypothesis with level of confidence specified at 0.10', function(t) { + t.equal(true, ss.chi_squared_goodness_of_fit(data_10_19, ss.poisson_distribution, 0.10)); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/chunks.test.js b/perf/simple-statistics/test/chunks.test.js new file mode 100644 index 0000000..73d7fa1 --- /dev/null +++ b/perf/simple-statistics/test/chunks.test.js @@ -0,0 +1,18 @@ +var test = require('tape'); +var ss = require('../'); + +test('chunks', function(t) { + test('can get chunks of an array', function(t) { + t.deepEqual(ss.chunk([1, 2], 1), [[1], [2]]); + t.deepEqual(ss.chunk([1, 2], 2), [[1, 2]]); + t.deepEqual(ss.chunk([1, 2, 3, 4], 4), [[1, 2, 3, 4]]); + t.deepEqual(ss.chunk([1, 2, 3, 4], 2), [[1, 2], [3, 4]]); + t.deepEqual(ss.chunk([1, 2, 3, 4], 3), [[1, 2, 3], [4]]); + t.deepEqual(ss.chunk([1, 2, 3, 4, 5, 6, 7], 2), [[1, 2], [3, 4], [5, 6], [7]]); + t.deepEqual(ss.chunk([], 2), []); + t.deepEqual(ss.chunk([], 0), null); + t.deepEqual(ss.chunk([1, 2], 0), null); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/cumulative.js b/perf/simple-statistics/test/cumulative.js new file mode 100644 index 0000000..0a3317f --- /dev/null +++ b/perf/simple-statistics/test/cumulative.js @@ -0,0 +1,13 @@ +var test = require('tape'); +var ss = require('../'); + +test('cumulative_std_normal_probability', function(t) { + // https://en.wikipedia.org/wiki/Standard_normal_table#Examples_of_use + test('wikipedia test example works', function(t) { + for (var i = 0; i < ss.standard_normal_table.length; i++) { + t.equal(ss.cumulative_std_normal_probability(0.4), 0.6554); + } + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/factorial.test.js b/perf/simple-statistics/test/factorial.test.js new file mode 100644 index 0000000..90d56e4 --- /dev/null +++ b/perf/simple-statistics/test/factorial.test.js @@ -0,0 +1,22 @@ +var test = require('tape'); +var ss = require('../'); + +test('factorial', function(t) { + test('can return null given a negative number', function(t) { + t.equal(null, ss.factorial(-1)); + t.end(); + }); + test('can calculate 0! = 1', function(t) { + t.equal(ss.factorial(0), 1); + t.end(); + }); + test('can calculate 1! = 1', function(t) { + t.equal(ss.factorial(1), 1); + t.end(); + }); + test('can calculate 100! = 1', function(t) { + t.equal(ss.factorial(100), 9.33262154439441e+157); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/geometric_mean.test.js b/perf/simple-statistics/test/geometric_mean.test.js new file mode 100644 index 0000000..d89a5ca --- /dev/null +++ b/perf/simple-statistics/test/geometric_mean.test.js @@ -0,0 +1,23 @@ +var test = require('tape'); +var ss = require('../'); + +test('geometric mean', function(t) { + // From http://en.wikipedia.org/wiki/Geometric_mean + test('can get the mean of two numbers', function(t) { + t.equal(ss.geometric_mean([2, 8]), 4); + t.equal(ss.geometric_mean([4, 1, 1 / 32]), 0.5); + t.equal(Math.round(ss.geometric_mean([2, 32, 1])), 4); + t.end(); + }); + + test('returns null for empty lists', function(t) { + t.equal(ss.geometric_mean([]), null); + t.end(); + }); + + test('returns null for lists with negative numbers', function(t) { + t.equal(ss.geometric_mean([-1]), null); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/harmonic_mean.test.js b/perf/simple-statistics/test/harmonic_mean.test.js new file mode 100644 index 0000000..711e970 --- /dev/null +++ b/perf/simple-statistics/test/harmonic_mean.test.js @@ -0,0 +1,27 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(x) { + return Math.round(x * 1000) / 1000; +} + +test('harmonic_mean', function(t) { + // From http://en.wikipedia.org/wiki/Harmonic_mean + test('can get the mean of two or more numbers', function(t) { + t.equal(ss.harmonic_mean([1, 1]), 1); + t.equal(rnd(ss.harmonic_mean([2, 3])), 2.4); + t.equal(ss.harmonic_mean([1, 2, 4]), 12 / 7); + t.end(); + }); + + test('returns null for empty lists', function(t) { + t.equal(ss.harmonic_mean([]), null); + t.end(); + }); + + test('returns null for lists with negative numbers', function(t) { + t.equal(ss.harmonic_mean([-1]), null); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/iqr.test.js b/perf/simple-statistics/test/iqr.test.js new file mode 100644 index 0000000..1367701 --- /dev/null +++ b/perf/simple-statistics/test/iqr.test.js @@ -0,0 +1,24 @@ +var test = require('tape'); +var ss = require('../'); + +test('interquartile range (iqr)', function(t) { + // Data and results from + // [Wikipedia](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population) + test('can get proper iqr of an even-length list', function(t) { + var even = [3, 6, 7, 8, 8, 10, 13, 15, 16, 20]; + t.equal(ss.quantile(even, 0.75) - ss.quantile(even, 0.25), ss.iqr(even)); + t.end(); + }); + + test('can get proper iqr of an odd-length list', function(t) { + var odd = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20]; + t.equal(ss.quantile(odd, 0.75) - ss.quantile(odd, 0.25), ss.iqr(odd)); + t.end(); + }); + + test('an iqr of a zero-length list produces null', function(t) { + t.equal(ss.iqr([]), null); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/jenks.test.js b/perf/simple-statistics/test/jenks.test.js new file mode 100644 index 0000000..b4ee1d6 --- /dev/null +++ b/perf/simple-statistics/test/jenks.test.js @@ -0,0 +1,14 @@ +var test = require('tape'); +var ss = require('../'); + +test('jenks', function(t) { + test('will not try to assign more classes than datapoints', function(t) { + t.equal(ss.jenks([1, 2], 3), null); + t.end(); + }); + test('assigns correct breaks', function(t) { + t.deepEqual(ss.jenks([1, 2, 4, 5, 7, 9, 10, 20], 3), [1, 2, 5, 20]); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/linear_regression.test.js b/perf/simple-statistics/test/linear_regression.test.js new file mode 100644 index 0000000..7fcaf1c --- /dev/null +++ b/perf/simple-statistics/test/linear_regression.test.js @@ -0,0 +1,54 @@ +var test = require('tape'); +var ss = require('../'); + +test('linear regression', function(t) { + test('correctly generates a line for a 0, 0 to 1, 1 dataset', function(t) { + var l = ss.linear_regression().data([[0, 0], [1, 1]]); + t.equal(l.line()(0), 0); + t.equal(l.line()(0.5), 0.5); + t.equal(l.line()(1), 1); + t.end(); + }); + + test('correctly generates a line for a 0, 0 to 1, 0 dataset', function(t) { + var l = ss.linear_regression().data([[0, 0], [1, 0]]); + t.equal(l.line()(0), 0); + t.equal(l.line()(0.5), 0); + t.equal(l.line()(1), 0); + t.end(); + }); + + test('returns the data assigned to it', function(t) { + var l = ss.linear_regression().data([[0, 0], [1, 0]]); + t.deepEqual(l.data(), [[0, 0], [1, 0]]); + t.end(); + }); + + test('handles a single-point sample', function(t) { + var l = ss.linear_regression().data([[0, 0]]).line(); + t.deepEqual(l(10), 0); + t.end(); + }); + + test('a straight line will have a slope of 0', function(t) { + var l = ss.linear_regression().data([[0, 0], [1, 0]]); + t.equal(l.m(), 0); + t.equal(l.b(), 0); + t.end(); + }); + + test('a line at 50% grade', function(t) { + var l = ss.linear_regression().data([[0, 0], [1, 0.5]]); + t.equal(l.m(), 0.5); + t.equal(l.b(), 0); + t.end(); + }); + + test('a line with a high y-intercept', function(t) { + var l = ss.linear_regression().data([[0, 20], [1, 10]]); + t.equal(l.m(), -10); + t.equal(l.b(), 20); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/mad.test.js b/perf/simple-statistics/test/mad.test.js new file mode 100644 index 0000000..cb0abaa --- /dev/null +++ b/perf/simple-statistics/test/mad.test.js @@ -0,0 +1,26 @@ +var test = require('tape'); +var ss = require('../'); + +test('median absolute deviation (mad)', function(t) { + test('median absolute deviation of an example on wikipedia', function(t) { + t.equal(ss.mad([1, 1, 2, 2, 4, 6, 9]), 1); + t.end(); + }); + + // wolfram alpha: median absolute deviation {0,1,2,3,4,5,6,7,8,9,10} + test('median absolute deviation of 0-10', function(t) { + t.equal(ss.mad([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3); + t.end(); + }); + + test('median absolute deviation of one number is zero', function(t) { + t.equal(ss.mad([1]), 0); + t.end(); + }); + + test('zero-length corner case', function(t) { + t.equal(ss.mad([]), null); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/mean.test.js b/perf/simple-statistics/test/mean.test.js new file mode 100644 index 0000000..6d054bd --- /dev/null +++ b/perf/simple-statistics/test/mean.test.js @@ -0,0 +1,18 @@ +var test = require('tape'); +var ss = require('../'); + +test('mean', function(t) { + test('can get the mean of two numbers', function(t) { + t.equal(ss.mean([1, 2]), 1.5); + t.end(); + }); + test('can get the mean of one number', function(t) { + t.equal(ss.mean([1]), 1); + t.end(); + }); + test('an empty list has no average', function(t) { + t.equal(ss.mean([]), null); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/median.test.js b/perf/simple-statistics/test/median.test.js new file mode 100644 index 0000000..f11f560 --- /dev/null +++ b/perf/simple-statistics/test/median.test.js @@ -0,0 +1,38 @@ +var test = require('tape'); +var ss = require('../'); + +test('median', function(t) { + test('can get the median of three numbers', function(t) { + t.equal(ss.median([1, 2, 3]), 2); + t.end(); + }); + + test('can get the median of two numbers', function(t) { + t.equal(ss.median([1, 2]), 1.5); + t.end(); + }); + + test('can get the median of four numbers', function(t) { + t.equal(ss.median([1, 2, 3, 4]), 2.5); + t.end(); + }); + + test('gives null for the median of an empty list', function(t) { + t.equal(ss.median([]), null); + t.end(); + }); + + test('sorts numbers numerically', function(t) { + t.equal(ss.median([8, 9, 10]), 9); + t.end(); + }); + + test('does not change the sorting order of its input', function(t) { + var x = [1, 0]; + t.equal(ss.median(x), 0.5); + t.equal(x[0], 1); + t.equal(x[1], 0); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/minmax.test.js b/perf/simple-statistics/test/minmax.test.js new file mode 100644 index 0000000..32656f6 --- /dev/null +++ b/perf/simple-statistics/test/minmax.test.js @@ -0,0 +1,23 @@ +var test = require('tape'); +var ss = require('../'); + +test('min', function(t) { + test('can get the minimum of one number', function(t) { + t.equal(ss.min([1]), 1); + t.end(); + }); + + test('can get the minimum of three numbers', function(t) { + t.equal(ss.min([1, 7, -1000]), -1000); + t.end(); + }); + t.end(); +}); + +test('max', function(t) { + test('can get the maximum of three numbers', function(t) { + t.equal(ss.max([1, 7, -1000]), 7); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/mixin.test.js b/perf/simple-statistics/test/mixin.test.js new file mode 100644 index 0000000..0367c4b --- /dev/null +++ b/perf/simple-statistics/test/mixin.test.js @@ -0,0 +1,34 @@ +var test = require('tape'); +var ss = require('../'); + +test('mixin', function(t) { + test('can mix into a single array', function(t) { + var even = ss.mixin([2, 4, 6, 8]); + t.equal(even.sum(), 20); + t.equal(even.mean(), 5); + t.equal(even.max(), 8); + t.equal(even.min(), 2); + t.equal(even.sample_skewness(), 0); + t.end(); + }); + + test('can mix into Array.prototype', function(t) { + ss.mixin(); + var even = [2, 4, 6, 8]; + t.equal(even.sum(), 20); + t.equal(even.mean(), 5); + t.equal(even.max(), 8); + t.equal(even.min(), 2); + t.equal(even.sample_skewness(), 0); + t.end(); + }); + + test('mixins can take arguments', function(t) { + ss.mixin(); + var even = [2, 4, 6, 8]; + t.equal(even.quantile(0.2), 2); + t.equal(even.quantile(0.8), 8); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/mode.test.js b/perf/simple-statistics/test/mode.test.js new file mode 100644 index 0000000..1609ff4 --- /dev/null +++ b/perf/simple-statistics/test/mode.test.js @@ -0,0 +1,37 @@ +var test = require('tape'); +var ss = require('../'); + +test('mode', function(t) { + test('the mode of a single-number array is that one number', function(t) { + t.equal(ss.mode([1]), 1); + t.end(); + }); + + test('the mode of a two-number array is that one number', function(t) { + t.equal(ss.mode([1, 1]), 1); + t.end(); + }); + + test('other cases', function(t) { + t.equal(ss.mode([1, 1, 2]), 1); + t.equal(ss.mode([1, 1, 2, 3]), 1); + t.equal(ss.mode([1, 1, 2, 3, 3]), 1); + t.equal(ss.mode([1, 1, 2, 3, 3, 3]), 3); + t.equal(ss.mode([1, 1, 2, 2, 2, 2, 3, 3, 3]), 2); + t.equal(ss.mode([1, 2, 3, 4, 5]), 1); + t.equal(ss.mode([1, 2, 3, 4, 5, 5]), 5); + t.equal(ss.mode([1, 1, 1, 2, 2, 3, 3, 4, 4]), 1); + t.end(); + }); + + test('the mode of an empty array is null', function(t) { + t.equal(ss.mode([]), null); + t.end(); + }); + + test('the mode of a three-number array with two same numbers is the repeated one', function(t) { + t.equal(ss.mode([1, 2, 2]), 2); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/normal_distribution.test.js b/perf/simple-statistics/test/normal_distribution.test.js new file mode 100644 index 0000000..ab9b426 --- /dev/null +++ b/perf/simple-statistics/test/normal_distribution.test.js @@ -0,0 +1,60 @@ +var test = require('tape'); +var ss = require('../'); + +test('natural distribution and z-score', function(t) { + + test('normal table is exposed in the API', function(t) { + t.equal(ss.standard_normal_table.length, 310); + t.equal(ss.standard_normal_table[0], 0.5); + t.end(); + }); + + test('P(Z <= 0.4) is 0.6554', function(t) { + // Taken from the examples of use in http://en.wikipedia.org/wiki/Standard_normal_table + t.equal(ss.cumulative_std_normal_probability(0.4), 0.6554); + t.end(); + }); + + test('P(Z <= -1.20) is 0.1151', function(t) { + // Taken from the examples of use in http://en.wikipedia.org/wiki/Standard_normal_table + t.equal(ss.cumulative_std_normal_probability(-1.20), 0.1151); + t.end(); + }); + + test('P(X <= 82) when X ~ N (80, 25) is 0.6554', function(t) { + // Taken from the examples of use in http://en.wikipedia.org/wiki/Standard_normal_table + // A professor's exam scores are approximately distributed normally with mean 80 and standard deviation 5. + // What is the probability that a student scores an 82 or less? + t.equal(ss.cumulative_std_normal_probability(ss.z_score(82, 80, 5)), 0.6554); + t.end(); + }); + + test('P(X >= 90) when X ~ N (80, 25) is 0.0228', function(t) { + // Taken from the examples of use in http://en.wikipedia.org/wiki/Standard_normal_table + // A professor's exam scores are approximately distributed normally with mean 80 and standard deviation 5. + // What is the probability that a student scores a 90 or more? + t.equal(+(1 - ss.cumulative_std_normal_probability(ss.z_score(90, 80, 5))).toPrecision(5), 0.0228); + t.end(); + }); + + test('P(X <= 74) when X ~ N (80, 25) is 0.1151', function(t) { + // Taken from the examples of use in http://en.wikipedia.org/wiki/Standard_normal_table + // A professor's exam scores are approximately distributed normally with mean 80 and standard deviation 5. + // What is the probability that a student scores a 74 or less? + t.equal(ss.cumulative_std_normal_probability(ss.z_score(74, 80, 5)), 0.1151); + t.end(); + }); + + test('P(78 <= X <= 88) when X ~ N (80, 25) is 0.6006', function(t) { + // Taken from the examples of use in http://en.wikipedia.org/wiki/Standard_normal_table + // A professor's exam scores are approximately distributed normally with mean 80 and standard deviation 5. + // What is the probability that a student scores between 78 and 88? + var prob88 = ss.cumulative_std_normal_probability(ss.z_score(88, 80, 5)), + prob78 = ss.cumulative_std_normal_probability(ss.z_score(78, 80, 5)); + + t.equal(+(prob88 - prob78).toPrecision(5), 0.6006); + t.end(); + }); + + t.end(); +}); diff --git a/perf/simple-statistics/test/poisson_distribution.test.js b/perf/simple-statistics/test/poisson_distribution.test.js new file mode 100644 index 0000000..40af493 --- /dev/null +++ b/perf/simple-statistics/test/poisson_distribution.test.js @@ -0,0 +1,37 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(n) { + return parseFloat(n.toFixed(4)); +} + +// expected cumulative probabilities taken from Appendix 1, Table I of William W. Hines & Douglas C. +// Montgomery, "Probability and Statistics in Engineering and Management Science", Wiley (1980). +test('poisson_distribution', function(t) { + test('can return generate probability and cumulative probability distributions for lambda = 3.0', function(t) { + t.equal('object', typeof ss.poisson_distribution(3.0)); + t.equal(rnd(ss.poisson_distribution(3.0)[3]), 0.2240, ss.epsilon); + t.end(); + }); + test('can generate probability and cumulative probability distributions for lambda = 4.0', function(t) { + t.equal('object', typeof ss.poisson_distribution(4.0)); + t.equal(rnd(ss.poisson_distribution(4.0)[2]), 0.1465, ss.epsilon); + t.end(); + }); + test('can generate probability and cumulative probability distributions for lambda = 5.5', function(t) { + t.equal('object', typeof ss.poisson_distribution(5.5)); + t.equal(rnd(ss.poisson_distribution(5.5)[7]), 0.1234, ss.epsilon); + t.end(); + }); + test('can generate probability and cumulative probability distributions for lambda = 9.5', function(t) { + t.equal('object', typeof ss.poisson_distribution(9.5)); + t.equal(rnd(ss.poisson_distribution(9.5)[17]), 0.0088, ss.epsilon); + t.end(); + }); + test('can return null when lambda <= 0', function(t) { + t.equal(null, ss.poisson_distribution(0)); + t.equal(null, ss.poisson_distribution(-10)); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/quantile.test.js b/perf/simple-statistics/test/quantile.test.js new file mode 100644 index 0000000..5b4f8f5 --- /dev/null +++ b/perf/simple-statistics/test/quantile.test.js @@ -0,0 +1,64 @@ +var test = require('tape'); +var ss = require('../'); + +test('quantile', function(t) { + // Data and results from + // [Wikipedia](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population) + test('can get proper quantiles of an even-length list', function(t) { + var even = [3, 6, 7, 8, 8, 10, 13, 15, 16, 20]; + t.equal(ss.quantile(even, 0.25), 7); + t.equal(ss.quantile(even, 0.5), 9); + t.equal(ss.quantile(even, 0.75), 15); + t.end(); + }); + + test('can get proper quantiles of an odd-length list', function(t) { + var odd = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20]; + t.equal(ss.quantile(odd, 0.25), 7); + t.equal(ss.quantile(odd, 0.5), 9); + t.equal(ss.quantile(odd, 0.75), 15); + t.end(); + }); + + test('the median quantile is equal to the median', function(t) { + var rand = [1, 4, 5, 8]; + t.equal(ss.quantile(rand, 0.5), ss.median(rand)); + var rand2 = [10, 50, 2, 4, 4, 5, 8]; + t.equal(ss.quantile(rand2, 0.5), ss.median(rand2)); + t.end(); + }); + + test('a zero-length list produces null', function(t) { + t.equal(ss.quantile([], 0.5), null); + t.end(); + }); + + test('test odd-value case', function(t) { + t.equal(ss.quantile([0, 1, 2, 3, 4], 0.2), 1); + t.end(); + }); + + test('bad bounds produce null', function(t) { + t.equal(ss.quantile([1, 2, 3], 1.1), null); + t.equal(ss.quantile([1, 2, 3], -0.5), null); + t.end(); + }); + + test('max quantile is equal to the max', function(t) { + t.equal(ss.quantile([1, 2, 3], 1), ss.max([1, 2, 3])); + t.end(); + }); + + test('min quantile is equal to the min', function(t) { + t.equal(ss.quantile([1, 2, 3], 0), ss.min([1, 2, 3])); + t.end(); + }); + + test('if quantile arg is an array, response is an array of quantiles', function(t) { + var odd = [3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20]; + t.deepEqual(ss.quantile(odd, [0, 0.25, 0.5, 0.75, 1]), [3, 7, 9, 15, 20]); + t.deepEqual(ss.quantile(odd, [0.75, 0.5]), [15, 9]); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/quantilesorted.test.js b/perf/simple-statistics/test/quantilesorted.test.js new file mode 100644 index 0000000..0a56d7b --- /dev/null +++ b/perf/simple-statistics/test/quantilesorted.test.js @@ -0,0 +1,15 @@ +var test = require('tape'); +var ss = require('../'); + +test('quantile_sorted', function(t) { + // Data and results from + // [Wikipedia](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population) + test('can get proper quantiles of an even-length list', function(t) { + var even = [3, 6, 7, 8, 8, 10, 13, 15, 16, 20]; + t.equal(ss.quantile_sorted(even, 0.25), 7); + t.equal(ss.quantile_sorted(even, 0.5), 9); + t.equal(ss.quantile_sorted(even, 0.75), 15); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/r_squared.test.js b/perf/simple-statistics/test/r_squared.test.js new file mode 100644 index 0000000..062f9da --- /dev/null +++ b/perf/simple-statistics/test/r_squared.test.js @@ -0,0 +1,26 @@ +var test = require('tape'); +var ss = require('../'); + +test('r-squared', function(t) { + test('says that the r squared of a two-point line is perfect', function(t) { + var d = [[0, 0], [1, 1]]; + var l = ss.linear_regression().data(d); + t.equal(ss.r_squared(d, l.line()), 1); + t.end(); + }); + + test('says that the r squared of a three-point line is not perfect', function(t) { + var d = [[0, 0], [0.5, 0.2], [1, 1]]; + var l = ss.linear_regression().data(d); + t.notEqual(ss.r_squared(d, l.line()), 1); + t.end(); + }); + + test('r-squared of single sample is 1', function(t) { + var d = [[0, 0]]; + var l = ss.linear_regression().data(d); + t.equal(ss.r_squared(d, l.line()), 1); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/root_mean_square.test.js b/perf/simple-statistics/test/root_mean_square.test.js new file mode 100644 index 0000000..f4e7d2f --- /dev/null +++ b/perf/simple-statistics/test/root_mean_square.test.js @@ -0,0 +1,23 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(x) { + return Math.round(x * 1000) / 1000; +} + +test('root_mean_square', function(t) { + // From http://en.wikipedia.org/wiki/Root_mean_square + test('can get the RMS of two or more numbers', function(t) { + t.equal(ss.root_mean_square([1, 1]), 1); + t.equal(rnd(ss.root_mean_square([3, 4, 5])), 4.082); + t.equal(rnd(ss.root_mean_square([-0.1, 5, -2, 10])), 5.679); + t.end(); + }); + + test('returns null for empty lists', function(t) { + t.equal(ss.root_mean_square([]), null); + t.end(); + }); + + t.end(); +}); diff --git a/perf/simple-statistics/test/sample.test.js b/perf/simple-statistics/test/sample.test.js new file mode 100644 index 0000000..0d21f12 --- /dev/null +++ b/perf/simple-statistics/test/sample.test.js @@ -0,0 +1,19 @@ +var test = require('tape'); +var Random = require('random-js'); +var random = new Random(Random.engines.mt19937().seed(0)); +var ss = require('../'); + +function rng() { return random.real(0, 1); } + +test('sample', function(t) { + t.deepEqual(ss.sample([], 0, rng), [], 'edge case - zero array'); + t.deepEqual(ss.sample([], 2, rng), [], 'edge case - zero array'); + t.deepEqual(ss.sample([1,2,3], 0, rng, 0), [], 'edge case - zero array'); + t.deepEqual(ss.sample([1,2,3], 1, rng), [1], 'edge case - sample of 1'); + t.deepEqual(ss.sample([1,2,3], 1, rng), [2]); + t.deepEqual(ss.sample([1,2,3], 3, rng), [2,3,1]); + t.deepEqual(ss.sample([1,2,3,4], 2, rng), [3,1]); + t.deepEqual(ss.sample([1,2,3,4,6,7,8], 2, rng), [8,7]); + t.deepEqual(ss.sample(['foo', 'bar'], 1, rng), ['foo'], 'non-number contents'); + t.end(); +}); diff --git a/perf/simple-statistics/test/sample_correlation.test.js b/perf/simple-statistics/test/sample_correlation.test.js new file mode 100644 index 0000000..5bac3db --- /dev/null +++ b/perf/simple-statistics/test/sample_correlation.test.js @@ -0,0 +1,29 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(x) { + return Math.round(x * 1000) / 1000; +} + +test('sample correlation', function(t) { + + test('can get the sample correlation of identical arrays', function(t) { + var data = [1, 2, 3, 4, 5, 6]; + t.equal(rnd(ss.sample_correlation(data, data)), 1); + t.end(); + }); + + test('can get the sample correlation of different arrays', function(t) { + var a = [1, 2, 3, 4, 5, 6]; + var b = [2, 2, 3, 4, 5, 60]; + t.equal(rnd(ss.sample_correlation(a, b)), 0.691); + t.end(); + }); + + test('zero-length corner case', function(t) { + t.equal(rnd(ss.sample_correlation([], [])), 0); + t.end(); + }); + + t.end(); +}); diff --git a/perf/simple-statistics/test/sample_covariance.test.js b/perf/simple-statistics/test/sample_covariance.test.js new file mode 100644 index 0000000..046014f --- /dev/null +++ b/perf/simple-statistics/test/sample_covariance.test.js @@ -0,0 +1,34 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(x) { + return Math.round(x * 1000) / 1000; +} + +test('sample covariance', function(t) { + test('can get perfect negative covariance', function(t) { + var x = [1, 2, 3, 4, 5, 6]; + var y = [6, 5, 4, 3, 2, 1]; + t.equal(rnd(ss.sample_covariance(x, y)), -3.5); + t.end(); + }); + + test('covariance of something with itself is its variance', function(t) { + var x = [1, 2, 3, 4, 5, 6]; + t.equal(rnd(ss.sample_covariance(x, x)), 3.5); + t.end(); + }); + + test('covariance is zero for something with no correlation', function(t) { + var x = [1, 2, 3, 4, 5, 6]; + var y = [1, 1, 2, 2, 1, 1]; + t.equal(rnd(ss.sample_covariance(x, y)), 0); + t.end(); + }); + + test('zero-length corner case', function(t) { + t.equal(rnd(ss.sample_covariance([], [])), 0); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/sample_skewness.test.js b/perf/simple-statistics/test/sample_skewness.test.js new file mode 100644 index 0000000..5a53575 --- /dev/null +++ b/perf/simple-statistics/test/sample_skewness.test.js @@ -0,0 +1,48 @@ +var test = require('tape'); +var ss = require('../'); + +test('sample skewness', function(t) { + + test('the skewness of an empty sample is null', function(t) { + var data = []; + t.equal(ss.sample_skewness(data), null); + t.end(); + }); + + test('the skewness of an sample with one number is null', function(t) { + var data = [1]; + t.equal(ss.sample_skewness(data), null); + t.end(); + }); + + test('the skewness of an sample with two numbers is null', function(t) { + var data = [1, 2]; + t.equal(ss.sample_skewness(data), null); + t.end(); + }); + + test('can calculate the skewness of SAS example 1', function(t) { + // Data and answer taken from SKEWNESS function documentation at + // http://support.sas.com/documentation/c../lrdict/64316/HTML/default/viewer.htm#a000245947.htm + var data = [0, 1, 1]; + t.equal(+ss.sample_skewness(data).toPrecision(10), -1.732050808); + t.end(); + }); + + test('can calculate the skewness of SAS example 2', function(t) { + // Data and answer taken from SKEWNESS function documentation at + // http://support.sas.com/documentation/c../lrdict/64316/HTML/default/viewer.htm#a000245947.htm + var data = [2, 4, 6, 3, 1]; + t.equal(+ss.sample_skewness(data).toPrecision(10), 0.5901286564); + t.end(); + }); + + test('can calculate the skewness of SAS example 3', function(t) { + // Data and answer taken from SKEWNESS function documentation at + // http://support.sas.com/documentation/c../lrdict/64316/HTML/default/viewer.htm#a000245947.htm + var data = [2, 0, 0]; + t.equal(+ss.sample_skewness(data).toPrecision(10), 1.732050808); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/sample_standard_deviation.test.js b/perf/simple-statistics/test/sample_standard_deviation.test.js new file mode 100644 index 0000000..d0b705e --- /dev/null +++ b/perf/simple-statistics/test/sample_standard_deviation.test.js @@ -0,0 +1,19 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(x) { + return Math.round(x * 1000) / 1000; +} + +test('sample_standard_deviation', function(t) { + test('can get the standard deviation of an example on wikipedia', function(t) { + t.equal(rnd(ss.sample_standard_deviation([2, 4, 4, 4, 5, 5, 7, 9])), 2.138); + t.end(); + }); + + test('zero-length corner case', function(t) { + t.equal(rnd(ss.sample_standard_deviation([])), 0); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/sample_variance.test.js b/perf/simple-statistics/test/sample_variance.test.js new file mode 100644 index 0000000..9e206c7 --- /dev/null +++ b/perf/simple-statistics/test/sample_variance.test.js @@ -0,0 +1,38 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(x) { + return Math.round(x * 1000) / 1000; +} + +test('sample variance', function(t) { + test('can get the sample variance of a six-sided die', function(t) { + t.equal(rnd(ss.sample_variance([1, 2, 3, 4, 5, 6])), 3.5); + t.end(); + }); + + // confirmed in R + // + // > var(1:10) + // [1] 9.166667 + test('can get the sample variance of numbers 1-10', function(t) { + t.equal(rnd(ss.sample_variance([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])), 9.167); + t.end(); + }); + + test('the sample variance of two numbers that are the same is 0', function(t) { + t.equal(rnd(ss.sample_variance([1, 1])), 0); + t.end(); + }); + + test('the sample variance of one number is null', function(t) { + t.equal(ss.sample_variance([1]), null); + t.end(); + }); + + test('the sample variance of no numbers is null', function(t) { + t.equal(ss.sample_variance([]), null); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/shuffle.test.js b/perf/simple-statistics/test/shuffle.test.js new file mode 100644 index 0000000..0a3bd2b --- /dev/null +++ b/perf/simple-statistics/test/shuffle.test.js @@ -0,0 +1,24 @@ +var test = require('tape'); +var Random = require('random-js'); +var random = new Random(Random.engines.mt19937().seed(0)); +var ss = require('../'); + +function rng() { return random.real(0, 1); } + +test('shuffle', function(t) { + var input = [1, 2, 3, 4, 5, 6]; + t.deepEqual(ss.shuffle([], rng), []); + t.deepEqual(ss.shuffle(input, rng), [1, 5, 3, 2, 4, 6]); + t.deepEqual(input, [1, 2, 3, 4, 5, 6], 'does not change original array'); + t.deepEqual(ss.shuffle(input, rng), [5, 4, 1, 3, 6, 2]); + t.deepEqual(input, [1, 2, 3, 4, 5, 6], 'does not change original array'); + t.end(); +}); + +test('shuffle_in_place', function(t) { + var input = [1, 2, 3, 4, 5, 6]; + t.deepEqual(ss.shuffle_in_place([], rng), []); + t.deepEqual(ss.shuffle_in_place(input, rng), [6, 1, 5, 2, 4, 3]); + t.deepEqual(input, [6, 1, 5, 2, 4, 3], 'changes original array'); + t.end(); +}); diff --git a/perf/simple-statistics/test/standard_deviation.test.js b/perf/simple-statistics/test/standard_deviation.test.js new file mode 100644 index 0000000..ad4c20e --- /dev/null +++ b/perf/simple-statistics/test/standard_deviation.test.js @@ -0,0 +1,39 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(x) { + return Math.round(x * 1000) / 1000; +} + +test('standard_deviation', function(t) { + test('can get the standard deviation of an example on wikipedia', function(t) { + t.equal(rnd(ss.standard_deviation([2, 4, 4, 4, 5, 5, 7, 9])), 2); + t.end(); + }); + + // confirmed with numpy + // In [4]: numpy.std([1,2,3]) + // Out[4]: 0.81649658092772603 + test('can get the standard deviation of 1-3', function(t) { + t.equal(rnd(ss.standard_deviation([1, 2, 3])), 0.816); + t.end(); + }); + + test('zero-length array corner case', function(t) { + t.equal(rnd(ss.standard_deviation([])), 0); + t.end(); + }); + + // In [6]: numpy.std([0,1,2,3,4,5,6,7,8,9,10]) + // Out[6]: 3.1622776601683795 + test('can get the standard deviation of 1-10', function(t) { + t.equal(rnd(ss.standard_deviation([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])), 3.162); + t.end(); + }); + + test('the standard deviation of one number is zero', function(t) { + t.equal(rnd(ss.standard_deviation([1])), 0); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/standard_normal_table.js b/perf/simple-statistics/test/standard_normal_table.js new file mode 100644 index 0000000..c217b07 --- /dev/null +++ b/perf/simple-statistics/test/standard_normal_table.js @@ -0,0 +1,14 @@ +var test = require('tape'); +var ss = require('../'); + +test('standard_normal_table', function(t) { + test('all entries are numeric', function(t) { + for (var i = 0; i < ss.standard_normal_table.length; i++) { + t.equal(typeof ss.standard_normal_table[i], 'number'); + t.ok(ss.standard_normal_table[i] >= 0); + t.ok(ss.standard_normal_table[i] <= 1); + } + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/sum.test.js b/perf/simple-statistics/test/sum.test.js new file mode 100644 index 0000000..0be7d67 --- /dev/null +++ b/perf/simple-statistics/test/sum.test.js @@ -0,0 +1,15 @@ +var test = require('tape'); +var ss = require('../'); + +test('sum', function(t) { + test('can get the sum of two numbers', function(t) { + t.equal(ss.sum([1, 2]), 3); + t.end(); + }); + + test('the sum of no numbers is zero', function(t) { + t.equal(ss.sum([]), 0); + t.end(); + }); + t.end(); +}); diff --git a/perf/simple-statistics/test/t_test.test.js b/perf/simple-statistics/test/t_test.test.js new file mode 100644 index 0000000..daf8e3d --- /dev/null +++ b/perf/simple-statistics/test/t_test.test.js @@ -0,0 +1,38 @@ +var test = require('tape'), + ss = require('../'); + +test('t test', function(t) { + + test('can compare a known value to the mean of samples', function(t) { + var res = ss.t_test([1, 2, 3, 4, 5, 6], 3.385); + t.equal(res, 0.1649415480881466); + t.end(); + }); + + test('can test independency of two samples', function(t) { + var res = ss.t_test_two_sample([1, 2, 3, 4], [3, 4, 5, 6], 0); + t.equal(res, -2.1908902300206643); + t.end(); + }); + + test('can test independency of two samples (mu == -2)', function(t) { + var res = ss.t_test_two_sample([1, 2, 3, 4], [3, 4, 5, 6], -2); + t.equal(res, 0); + t.end(); + }); + + test('can test independency of two samples of different lengths', function(t) { + var res = ss.t_test_two_sample([1, 2, 3, 4], [3, 4, 5, 6, 1, 2, 0]); + t.equal(res, -0.4165977904505309); + t.end(); + }); + + test('has an edge case for one sample being of size zero', function(t) { + t.equal(ss.t_test_two_sample([1, 2, 3, 4], []), null); + t.equal(ss.t_test_two_sample([], [1, 2, 3, 4]), null); + t.equal(ss.t_test_two_sample([], []), null); + t.end(); + }); + + t.end(); +}); diff --git a/perf/simple-statistics/test/variance.test.js b/perf/simple-statistics/test/variance.test.js new file mode 100644 index 0000000..5328341 --- /dev/null +++ b/perf/simple-statistics/test/variance.test.js @@ -0,0 +1,24 @@ +var test = require('tape'); +var ss = require('../'); + +function rnd(x) { + return Math.round(x * 1000) / 1000; +} + +test('variance', function(t) { + test('can get the variance of a six-sided die', function(t) { + t.equal(rnd(ss.variance([1, 2, 3, 4, 5, 6])), 2.917); + t.end(); + }); + + test('the variance of one number is zero', function(t) { + t.equal(rnd(ss.variance([1])), 0); + t.end(); + }); + + test('the variance of no numbers is null', function(t) { + t.equal(ss.variance([]), null); + t.end(); + }); + t.end(); +}); diff --git a/tests/perf/index.js b/tests/perf/index.js deleted file mode 100644 index dbebd86..0000000 --- a/tests/perf/index.js +++ /dev/null @@ -1,119 +0,0 @@ -function setImmediate(cb) { - setTimeout(cb, 0); -} - -function parse_query() { - var query = window.location.search.substring(1); - var parsed = {}; - query.split('&').forEach(function(pair) { - pair = pair.split('='); - var key = decodeURIComponent(pair[0]); - var value = decodeURIComponent(pair[1]); - parsed[key] = value; - }); - return parsed; -} - -require(["Filer", "util"], function(Filer, util) { - - function time(test, cb) { - var start = performance.now(); - function done() { - var end = performance.now(); - cb(end - start); - } - test(done); - } - - var random_data = new Uint8Array(1024); // 1kB buffer - var read_buffer = new Uint8Array(1024); - - function run(iter) { - iter = (undefined == iter) ? 0 : iter; - - function before() { - util.setup(function() { - nextTick(during); - }); - } - - function during() { - var fs = util.fs(); - - window.crypto.getRandomValues(random_data); - time(function(done) { - fs.mkdir('/tmp', function(err) { - fs.stat('/tmp', function(err, stats) { - fs.open('/tmp/test', 'w', function(err, fd) { - fs.write(fd, random_data, null, null, null, function(err, nbytes) { - fs.close(fd, function(err) { - fs.stat('/tmp/test', function(err, stats) { - fs.open('/tmp/test', 'r', function(err, fd) { - fs.read(fd, read_buffer, null, null, null, function(err, nbytes) { - fs.close(fd, function(err) { - fs.unlink('/tmp/test', function(err) { - done(); - });});});});});});});});});}); - }, after); - } - - function after(dt) { - util.cleanup(complete.bind(null, iter, dt)); - } - - before(); - } - - var results = []; - function complete(iter, result) { - results.push(result); - - if(++iter < iterations) { - nextTick(run.bind(null, iter)); - } else { - do_stats(); - } - - progress.value = iter; - } - - function do_stats() { - var output = document.getElementById("output"); - var stats = { - mean: ss.mean(results) + " ms", - min: ss.min(results), - max: ss.max(results), - med_abs_dev: ss.median_absolute_deviation(results), - }; - - var t = document.createElement("table"); - var tbody = document.createElement("tbody"); - var keys = Object.keys(stats); - keys.forEach(function(key) { - var row = document.createElement("tr"); - - var key_cell = document.createElement("td"); - var key_cell_text = document.createTextNode(key); - key_cell.appendChild(key_cell_text); - row.appendChild(key_cell); - - var val_cell = document.createElement("td"); - var val_cell_text = document.createTextNode(stats[key]); - val_cell.appendChild(val_cell_text); - row.appendChild(val_cell); - - tbody.appendChild(row); - }); - - t.appendChild(tbody); - output.appendChild(t); - } - - var query = parse_query(); - var iterations = query.iterations || 10; - var progress = document.getElementById("progress"); - progress.max = iterations; - - run(); - -}); diff --git a/tests/perf/perf-test.html b/tests/perf/perf-test.html deleted file mode 100644 index dcd7b4c..0000000 --- a/tests/perf/perf-test.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - -
-
- - - - - - - -