/* global module */
diff --git a/perf/index.html b/perf/index.html new file mode 100644 index 0000000..7ca17db --- /dev/null +++ b/perf/index.html @@ -0,0 +1,13 @@ + + +
+ + + + + + + + + + diff --git a/perf/index.js b/perf/index.js new file mode 100644 index 0000000..f7d9354 --- /dev/null +++ b/perf/index.js @@ -0,0 +1,120 @@ +var Filer = require('..'); +var util = require('../tests/lib/test-utils.js'); + +function setImmediate(cb) { + setTimeout(cb, 0); +} + +function parse_query() { + var query = window.location.search.substring(1); + var parsed = {}; + query.split('&').forEach(function(pair) { + pair = pair.split('='); + var key = decodeURIComponent(pair[0]); + var value = decodeURIComponent(pair[1]); + parsed[key] = value; + }); + return parsed; +} + +var query = parse_query(); + +function time(test, cb) { + var start = performance.now(); + function done() { + var end = performance.now(); + cb(end - start); + } + test(done); +} + +var random_data = new Buffer(1024); // 1kB buffer +var read_buffer = new Buffer(1024); + +function run(iter) { + iter = (undefined == iter) ? 0 : iter; + + function before() { + util.setup(function() { + setImmediate(during); + }); + } + + function during() { + var fs = util.fs(); + + window.crypto.getRandomValues(random_data); + time(function(done) { + fs.mkdir('/tmp', function(err) { + fs.stat('/tmp', function(err, stats) { + fs.open('/tmp/test', 'w', function(err, fd) { + fs.write(fd, random_data, null, null, null, function(err, nbytes) { + fs.close(fd, function(err) { + fs.stat('/tmp/test', function(err, stats) { + fs.open('/tmp/test', 'r', function(err, fd) { + fs.read(fd, read_buffer, null, null, null, function(err, nbytes) { + fs.close(fd, function(err) { + fs.unlink('/tmp/test', function(err) { + done(); + });});});});});});});});});}); + }, after); + } + + function after(dt) { + util.cleanup(complete.bind(null, iter, dt)); + } + + before(); +} + +var results = []; +function complete(iter, result) { + results.push(result); + + if(++iter < iterations) { + setImmediate(run.bind(null, iter)); + } else { + do_stats(); + } + + progress.value = iter; +} + +function do_stats() { + var output = document.getElementById("output"); + var stats = { + mean: ss.mean(results) + " ms", + min: ss.min(results), + max: ss.max(results), + med_abs_dev: ss.median_absolute_deviation(results), + }; + + var t = document.createElement("table"); + var tbody = document.createElement("tbody"); + var keys = Object.keys(stats); + keys.forEach(function(key) { + var row = document.createElement("tr"); + + var key_cell = document.createElement("td"); + var key_cell_text = document.createTextNode(key); + key_cell.appendChild(key_cell_text); + row.appendChild(key_cell); + + var val_cell = document.createElement("td"); + var val_cell_text = document.createTextNode(stats[key]); + val_cell.appendChild(val_cell_text); + row.appendChild(val_cell); + + tbody.appendChild(row); + }); + + t.appendChild(tbody); + output.appendChild(t); +} + +var query = parse_query(); +var iterations = query.iterations || 10; +var progress = document.getElementById("progress"); +progress.max = iterations; + +run(); diff --git a/perf/simple-statistics/.gitignore b/perf/simple-statistics/.gitignore new file mode 100644 index 0000000..aa6fd7c --- /dev/null +++ b/perf/simple-statistics/.gitignore @@ -0,0 +1,3 @@ +components +build +node_modules \ No newline at end of file diff --git a/perf/simple-statistics/.jshintrc b/perf/simple-statistics/.jshintrc new file mode 100644 index 0000000..4891ee5 --- /dev/null +++ b/perf/simple-statistics/.jshintrc @@ -0,0 +1,8 @@ +{ + "indent": 4, + "undef": true, + "unused": true, + "globals": { + "require": true + } +} diff --git a/perf/simple-statistics/.travis.yml b/perf/simple-statistics/.travis.yml new file mode 100644 index 0000000..1d867b9 --- /dev/null +++ b/perf/simple-statistics/.travis.yml @@ -0,0 +1,7 @@ +language: node_js +node_js: + - 0.10 +script: + - npm install + - npm test + - npm run cov diff --git a/perf/simple-statistics/API.md b/perf/simple-statistics/API.md new file mode 100644 index 0000000..7936bc6 --- /dev/null +++ b/perf/simple-statistics/API.md @@ -0,0 +1,242 @@ +Basic contracts of functions: + +* Functions do not modify their arguments e.g. change their order +* Invalid input, like empty lists to functions that need 1+ items to work, will cause functions to return `null`. + +# Basic Array Operations + +### .mixin(array) + +_Optionally_ mix in the following functions into the `Array` prototype. Otherwise +you can use them off of the simple-statistics object itself. + +If given a particular array instance as an argument, this adds the functions +only to that array rather than the global `Array.prototype`. Without an argument, +it runs on the global `Array.prototype`. + +### .mean(x) + +Mean of a single-dimensional Array of numbers. _Also available as `.average(x)`_ + +### .sum(x) + +Sum of a single-dimensional Array of numbers. + +### .mode(x) + +Returns the number that appears most frequently in a single-dimensional Array +of numbers. If there are multiple modes, the one that appears last +is returned. + +### .variance(x) + +[Variance](http://en.wikipedia.org/wiki/Variance) of a single-dimensional Array of numbers. + +### .standard_deviation(x) + +[Standard Deviation](http://en.wikipedia.org/wiki/Standard_deviation) of a single-dimensional Array of numbers. + +### .sample(array, n) + +Return a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample) +of the given array. The sampling is _without replacement_, and uses a Fisher-Yates +sample to randomize. + +### .median_absolute_deviation(x) + +The Median Absolute Deviation (MAD) is a robust measure of statistical +dispersion. It is more resilient to outliers than the standard deviation. +Accepts a single-dimensional array of numbers and returns a dispersion value. + +Also aliased to `.mad(x)` for brevity. + +### .median(x) + +[Median](http://en.wikipedia.org/wiki/Median) of a single-dimensional array of numbers. + +### .geometric_mean(x) + +[Geometric mean](http://en.wikipedia.org/wiki/Geometric_mean) of a single-dimensional array of **positive** numbers. + +### .harmonic_mean(x) + +[Harmonic mean](http://en.wikipedia.org/wiki/Harmonic_mean) of a single-dimensional array of **positive** numbers. + +### .root_mean_square(x) + +[Root mean square (RMS)](http://en.wikipedia.org/wiki/Root_mean_square) of a single-dimensional array of numbers. + +Also aliased to `.rms(x)` for brevity. + +### .min(x) + +Finds the minimum of a single-dimensional array of numbers. This runs in linear `O(n)` time. + +### .max(x) + +Finds the maximum of a single-dimensional array of numbers. This runs in linear `O(n)` time. + +### .t_test(sample, x) + +Does a [student's t-test](http://en.wikipedia.org/wiki/Student's_t-test) of a dataset `sample`, represented by a single-dimensional array of numbers. `x` is the known value, and the result is a measure of [statistical significance](http://en.wikipedia.org/wiki/Statistical_significance). + +### .t_test_two_sample(sample_x, sample_y, difference) + +The two-sample t-test is used to compare samples from two populations or groups, +confirming or denying the suspicion (null hypothesis) that the populations are +the same. It returns a t-value that you can then look up to give certain +judgements of confidence based on a t distribution table. + +This implementation expects the samples `sample_x` and `sample_y` to be given +as one-dimensional arrays of more than one number each. + +### .sample_variance(x) + +Produces [sample variance](http://mathworld.wolfram.com/SampleVariance.html) +of a single-dimensional array of numbers. + +### .sample_covariance(a, b) + +Produces [sample covariance](http://en.wikipedia.org/wiki/Sample_mean_and_sample_covariance) +of two single-dimensional arrays of numbers. + +### .sample_correlation(a, b) + +Produces [sample correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) +of two single-dimensional arrays of numbers. + +### .quantile(sample, p) + +Does a [quantile](http://en.wikipedia.org/wiki/Quantile) of a dataset `sample`, +at p. For those familiary with the `k/q` syntax, `p == k/q`. `sample` must +be a single-dimensional array of numbers. p must be a number greater than or equal to +than zero and less or equal to than one, or an array of numbers following that rule. +If an array is given, an array of results will be returned instead of a single +number. + +### .chunk(sample, chunkSize) + +Given a `sample` array, and a positive integer `chunkSize`, splits an array +into chunks of `chunkSize` size and returns an array of those chunks. This +does not change the input value. If the length of `sample` is not divisible +by `chunkSize`, the last array will be shorter than the rest. + +### .shuffle(sample) + +Given a `sample` array (with any type of contents), return a random permutation +of that array, using the [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) +algorithm. + +### .shuffle_in_place(sample) + +Given a `sample` array (with any type of contents), return a random permutation +of that array, using the [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle) +algorithm. + +This changes the input array in-place, as well as returns it - unlike `.shuffle()`, +it does not create a shallow copy of the array. + +### .quantile_sorted(sample, p) + +Does a [quantile](http://en.wikipedia.org/wiki/Quantile) of a dataset `sample`, +at p. `sample` must be a one-dimensional _sorted_ array of numbers, and +`p` must be a single number from zero to one. + +### .iqr(sample) + +Calculates the [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) of +a sample - the difference between the upper and lower quartiles. Useful +as a measure of dispersion. + +_Also available as `.interquartile_range(x)`_ + +### .sample_skewness(sample) + +Calculates the [skewness](http://en.wikipedia.org/wiki/Skewness) of +a sample, a measure of the extent to which a probability distribution of a +real-valued random variable "leans" to one side of the mean. +The skewness value can be positive or negative, or even undefined. + +This implementation uses the [Fisher-Pearson standardized moment coefficient](http://en.wikipedia.org/wiki/Skewness#Pearson.27s_skewness_coefficients), +which means that it behaves the same as Excel, Minitab, SAS, and SPSS. + +Skewness is only valid for samples of over three values. + +### .jenks(data, number_of_classes) + +Find the [Jenks Natural Breaks](http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization) for +a single-dimensional array of numbers as input and a desired `number_of_classes`. +The result is a single-dimensional with class breaks, including the minimum +and maximum of the input array. + +### .r_squared(data, function) + +Find the [r-squared](http://en.wikipedia.org/wiki/Coefficient_of_determination) value of a particular dataset, expressed as a two-dimensional `Array` of numbers, against a `Function`. + + var r_squared = ss.r_squared([[1, 1]], function(x) { return x * 2; }); + +### .cumulative_std_normal_probability(z) + +Look up the given `z` value in a [standard normal table](http://en.wikipedia.org/wiki/Standard_normal_table) +to calculate the probability of a random variable appearing with a given value. + +### .z_score(x, mean, standard_deviation) + +The standard score is the number of standard deviations an observation +or datum is above or below the mean. + +### .standard_normal_table + +A [standard normal table](http://en.wikipedia.org/wiki/Standard_normal_table) from +which to pull values of Φ (phi). + +## Regression + +### .linear_regression() + +Create a new linear regression solver. + +#### .data([[1, 1], [2, 2]]) + +Set the data of a linear regression. The input is a two-dimensional array of numbers, which are treated as coordinates, like `[[x, y], [x1, y1]]`. + +#### .line() + +Get the linear regression line: this returns a function that you can +give `x` values and it will return `y` values. Internally, this uses the `m()` +and `b()` values and the classic `y = mx + b` equation. + + var linear_regression_line = ss.linear_regression() + .data([[0, 1], [2, 2], [3, 3]]).line(); + linear_regression_line(5); + +#### .m() + +Just get the slope of the fitted regression line, the `m` component of the full +line equation. Returns a number. + +#### .b() + +Just get the y-intercept of the fitted regression line, the `b` component +of the line equation. Returns a number. + +## Classification + +### .bayesian() + +Create a naïve bayesian classifier. + +### .train(item, category) + +Train the classifier to classify a certain item, given as an object with keys, +to be in a certain category, given as a string. + +### .score(item) + +Get the classifications of a certain item, given as an object of +`category -> score` mappings. + + var bayes = ss.bayesian(); + bayes.train({ species: 'Cat' }, 'animal'); + bayes.score({ species: 'Cat' }); + // { animal: 1 } diff --git a/perf/simple-statistics/CHANGELOG.md b/perf/simple-statistics/CHANGELOG.md new file mode 100644 index 0000000..f354935 --- /dev/null +++ b/perf/simple-statistics/CHANGELOG.md @@ -0,0 +1,60 @@ +# CHANGELOG + +## 0.9.0 + +* Adds `.sample` for simple random sampling +* Adds `.shuffle` and `.shuffle_in_place` for random permutations +* Adds `.chunk` for splitting arrays into chunked subsets + +## 0.8.1 + +* fixes a bug in `mode` that favored the last new number + +## 0.8.0 + +* `mixin` can now take an array in order to mixin functions into a single array + instance rather than the global Array prototype. + +## 0.7.0 + +* Adds `simple_statistics.harmonic_mean` thanks to [jseppi](https://github.com/jseppi) + +## 0.6.0 + +* Adds `simple_statistics.quantile_sorted` thanks to [rluta](http://github.com/rluta) +* `simple_statistics.quantile` now accepts a sorted list of quantiles as a second argument +* Improved test coverage + +## 0.5.0 + +* Adds `simple_statistics.cumulative_std_normal_probability` by [doronlinder](https://github.com/doronlinder) +* Adds `simple_statistics.z_score` by doronlinder +* Adds `simple_statistics.standard_normal_table` + +## 0.4.0 + +* Adds `simple_statistics.median_absolute_deviation()` by siculars +* Adds `simple_statistics.iqr()` by siculars +* Adds `simple_statistics.skewness()` by Doron Linder +* Lower-level accessors for linear regression allow users to do the line + equation themselves + +## 0.3.0 + +* Adds `simple_statistics.jenks()` +* Adds `simple_statistics.jenksMatrices()` +* Improves test coverage and validation + +## 0.2.0 + +* Adds `simple_statistics.quantile()` +* Adds `simple_statistics.mixin()` +* Adds `simple_statistics.geometric_mean()` +* Adds `simple_statistics.sample_variance()` +* Adds `simple_statistics.sample_covariance()` + +## 0.1.0 + +* Adds `simple_statistics.t_test()` +* Adds `simple_statistics.min()` +* Adds `simple_statistics.max()` diff --git a/perf/simple-statistics/CONTRIBUTING.md b/perf/simple-statistics/CONTRIBUTING.md new file mode 100644 index 0000000..eebdf1f --- /dev/null +++ b/perf/simple-statistics/CONTRIBUTING.md @@ -0,0 +1,99 @@ +# Contributing to simple-statistics + +Simple statistics is a statistics library that can be both used and read. +It should help programmers learn statistics and statisticians learn programming. +In order to achieve this goal, it must be **simple** and **explanatory**. + +## Simple + +`simple-statistics` is written in a subset of JavaScript. Unused features +include: + +* [Conditional Operator](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Conditional_Operator) +* [ES5 Array methods](http://ie.microsoft.com/TestDrive/HTML5/ECMAScript5Array/Default.html) +* `with`, `eval`, and other forms of `eval` +* Most micro-optimizations, like [alternative for loop forms](http://jsperf.com/loops/70) +* [Shortcut branching](http://javascriptweblog.wordpress.com/2010/07/26/no-more-ifs-alternatives-to-statement-branching-in-javascript/) + +## Explanatory + +Example: + +```js +// # harmonic mean +// +// a mean function typically used to find the average of rates +// +// this is the reciprocal of the arithmetic mean of the reciprocals +// of the input numbers +// +// This runs on `O(n)`, linear time in respect to the array +``` + +`simple-statistics` tries to stay away from speaking only in the language of math: +for instance, while JavaScript supports UTF8 characters like π, they are not used +in the source: + +* UTF8 in JavaScript on pages without specific meta-tag or Content-Type encodings will fail +* UTF8 can be hard to type, since users need to memorize key combinations or code points +* Mathematical symbols have meanings that are often better communicated by words: + in the form of code, we do not run out of space on the paper, and can afford + to call a variable `reciprocal_sum` instead of `r`. + +Every function has a comment that ideally includes: + +* The English, long-form name of the method +* What the method does +* What purpose the method typically serves +* A link to a longer description on Wikipedia, Mathematica, or another + web-accessible, non-paywalled source +* The efficiency of the function in terms of Big-O notation, if appropriate +* If the function depends on another function in the library, a note of this, like + `depends on mean()` + +## Tests + +`simple-statistics` has a testsuite located in `test/spec/`. Each test file +covers a specific topic and tries to test against known values: + +* Values produced by trusted statistics software like R or scipy +* Common-sense results + +Tests can be run in [node.js](http://nodejs.org/) and are run on every commit +to GitHub by Travis-CI. + +To run tests: + +```sh +npm install +npm test +``` + +## Documentation + +While the code is meant to readable, it is not documentation. We maintain +documentation in `API.md`, which has the simple form: + +```md +### .geometric_mean(x) + +[Geometric mean](http://en.wikipedia.org/wiki/Geometric_mean) of a single-dimensional array of **positive** numbers. +``` + +This file is written in [Markdown](https://daringfireball.net/projects/markdown/) and +specifies which functions are available, what type of arguments they receive, +what they compute, and what type of answer they return. + +## Code Style + +We use the [Airbnb style for Javascript](https://github.com/airbnb/javascript) with +only one difference: + +**4 space soft tabs always for Javascript, not 2.** + +No aligned `=`, no aligned arguments, spaces are either indents or the 1 +space between expressions. No hard tabs. + +* All comparisons should be as strict and obvious as possible: prefer `(foo === 0)` to + `(!foo)`. +* Straightforward code is more important than most optimizations. diff --git a/perf/simple-statistics/LICENSE b/perf/simple-statistics/LICENSE new file mode 100644 index 0000000..13a2f78 --- /dev/null +++ b/perf/simple-statistics/LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2014, Tom MacWright + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. diff --git a/perf/simple-statistics/Makefile b/perf/simple-statistics/Makefile new file mode 100644 index 0000000..f497307 --- /dev/null +++ b/perf/simple-statistics/Makefile @@ -0,0 +1,7 @@ +docs: + docco src/*.js + +test: + mocha -R spec test/spec/*.js + +.PHONY: docs test diff --git a/perf/simple-statistics/README.md b/perf/simple-statistics/README.md new file mode 100644 index 0000000..f3587ba --- /dev/null +++ b/perf/simple-statistics/README.md @@ -0,0 +1,337 @@ +[![Build Status](https://secure.travis-ci.org/tmcw/simple-statistics.png?branch=master)](http://travis-ci.org/tmcw/simple-statistics) [![Coverage Status](https://coveralls.io/repos/tmcw/simple-statistics/badge.png)](https://coveralls.io/r/tmcw/simple-statistics) + +A JavaScript implementation of descriptive, regression, and inference statistics. + +Implemented in literate JavaScript with no dependencies, designed to work +in all modern browsers (including IE) as well as in node.js. + +## [API Documentation](API.md) + +--- + +Basic contracts of functions: + +* Functions do not modify their arguments e.g. change their order +* Invalid input, like empty lists to functions that need 1+ items to work, will cause functions to return `null`. + +# Basic Array Operations + +### .mixin(array) + +_Optionally_ mix in the following functions into the `Array` prototype. Otherwise +you can use them off of the simple-statistics object itself. + +If given a particular array instance as an argument, this adds the functions +only to that array rather than the global `Array.prototype`. Without an argument, +it runs on the global `Array.prototype`. + +### .mean(x) + +Mean of a single-dimensional Array of numbers. _Also available as `.average(x)`_ + +### .sum(x) + +Sum of a single-dimensional Array of numbers. + +### .mode(x) + +Returns the number that appears most frequently in a single-dimensional Array +of numbers. If there are multiple modes, the one that appears last +is returned. + +### .variance(x) + +[Variance](http://en.wikipedia.org/wiki/Variance) of a single-dimensional Array of numbers. + +### .standard_deviation(x) + +[Standard Deviation](http://en.wikipedia.org/wiki/Standard_deviation) of a single-dimensional Array of numbers. + +### .median_absolute_deviation(x) + +The Median Absolute Deviation (MAD) is a robust measure of statistical +dispersion. It is more resilient to outliers than the standard deviation. +Accepts a single-dimensional array of numbers and returns a dispersion value. + +Also aliased to `.mad(x)` for brevity. + +### .median(x) + +[Median](http://en.wikipedia.org/wiki/Median) of a single-dimensional array of numbers. + +### .geometric_mean(x) + +[Geometric mean](http://en.wikipedia.org/wiki/Geometric_mean) of a single-dimensional array of **positive** numbers. + +### .harmonic_mean(x) + +[Harmonic mean](http://en.wikipedia.org/wiki/Harmonic_mean) of a single-dimensional array of **positive** numbers. + +### .root_mean_square(x) + +[Root mean square (RMS)](http://en.wikipedia.org/wiki/Root_mean_square) of a single-dimensional array of numbers. + +### .min(x) + +Finds the minimum of a single-dimensional array of numbers. This runs in linear `O(n)` time. + +### .max(x) + +Finds the maximum of a single-dimensional array of numbers. This runs in linear `O(n)` time. + +### .t_test(sample, x) + +Does a [student's t-test](http://en.wikipedia.org/wiki/Student's_t-test) of a dataset `sample`, represented by a single-dimensional array of numbers. `x` is the known value, and the result is a measure of [statistical significance](http://en.wikipedia.org/wiki/Statistical_significance). + +### .t_test_two_sample(sample_x, sample_y, difference) + +The two-sample t-test is used to compare samples from two populations or groups, +confirming or denying the suspicion (null hypothesis) that the populations are +the same. It returns a t-value that you can then look up to give certain +judgements of confidence based on a t distribution table. + +This implementation expects the samples `sample_x` and `sample_y` to be given +as one-dimensional arrays of more than one number each. + +### .sample_variance(x) + +Produces [sample variance](http://mathworld.wolfram.com/SampleVariance.html) +of a single-dimensional array of numbers. + +### .sample_covariance(a, b) + +Produces [sample covariance](http://en.wikipedia.org/wiki/Sample_mean_and_sample_covariance) +of two single-dimensional arrays of numbers. + +### .sample_correlation(a, b) + +Produces [sample correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) +of two single-dimensional arrays of numbers. + +### .quantile(sample, p) + +Does a [quantile](http://en.wikipedia.org/wiki/Quantile) of a dataset `sample`, +at p. For those familiary with the `k/q` syntax, `p == k/q`. `sample` must +be a single-dimensional array of numbers. p must be a number greater than or equal to zero and less than or equal to one, or an array of numbers following that rule. +If an array is given, an array of results will be returned instead of a single +number. + +### .chunk(sample, chunkSize) + +Given a `sample` array, and a positive integer `chunkSize`, splits an array +into chunks of `chunkSize` size and returns an array of those chunks. This +does not change the input value. If the length of `sample` is not divisible +by `chunkSize`, the last array will be shorter than the rest. + +### .quantile_sorted(sample, p) + +Does a [quantile](http://en.wikipedia.org/wiki/Quantile) of a dataset `sample`, +at p. `sample` must be a one-dimensional _sorted_ array of numbers, and +`p` must be a single number greater than or equal to zero and less than or equal to one. + +### .iqr(sample) + +Calculates the [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) of +a sample - the difference between the upper and lower quartiles. Useful +as a measure of dispersion. + +_Also available as `.interquartile_range(x)`_ + +### .sample_skewness(sample) + +Calculates the [skewness](http://en.wikipedia.org/wiki/Skewness) of +a sample, a measure of the extent to which a probability distribution of a +real-valued random variable "leans" to one side of the mean. +The skewness value can be positive or negative, or even undefined. + +This implementation uses the [Fisher-Pearson standardized moment coefficient](http://en.wikipedia.org/wiki/Skewness#Pearson.27s_skewness_coefficients), +which means that it behaves the same as Excel, Minitab, SAS, and SPSS. + +Skewness is only valid for samples of over three values. + +### .jenks(data, number_of_classes) + +Find the [Jenks Natural Breaks](http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization) for +a single-dimensional array of numbers as input and a desired `number_of_classes`. +The result is a single-dimensional with class breaks, including the minimum +and maximum of the input array. + +### .r_squared(data, function) + +Find the [r-squared](http://en.wikipedia.org/wiki/Coefficient_of_determination) value of a particular dataset, expressed as a two-dimensional `Array` of numbers, against a `Function`. + + var r_squared = ss.r_squared([[1, 1]], function(x) { return x * 2; }); + +### .cumulative_std_normal_probability(z) + +Look up the given `z` value in a [standard normal table](http://en.wikipedia.org/wiki/Standard_normal_table) +to calculate the probability of a random variable appearing with a given value. + +### .z_score(x, mean, standard_deviation) + +The standard score is the number of standard deviations an observation +or datum is above or below the mean. + +### .standard_normal_table + +A [standard normal table](http://en.wikipedia.org/wiki/Standard_normal_table) from +which to pull values of Φ (phi). + +## Regression + +### .linear_regression() + +Create a new linear regression solver. + +#### .data([[1, 1], [2, 2]]) + +Set the data of a linear regression. The input is a two-dimensional array of numbers, which are treated as coordinates, like `[[x, y], [x1, y1]]`. + +#### .line() + +Get the linear regression line: this returns a function that you can +give `x` values and it will return `y` values. Internally, this uses the `m()` +and `b()` values and the classic `y = mx + b` equation. + + var linear_regression_line = ss.linear_regression() + .data([[0, 1], [2, 2], [3, 3]]).line(); + linear_regression_line(5); + +#### .m() + +Just get the slope of the fitted regression line, the `m` component of the full +line equation. Returns a number. + +#### .b() + +Just get the y-intercept of the fitted regression line, the `b` component +of the line equation. Returns a number. + +## Classification + +### .bayesian() + +Create a naïve bayesian classifier. + +### .train(item, category) + +Train the classifier to classify a certain item, given as an object with keys, +to be in a certain category, given as a string. + +### .score(item) + +Get the classifications of a certain item, given as an object of +`category -> score` mappings. + + var bayes = ss.bayesian(); + bayes.train({ species: 'Cat' }, 'animal'); + bayes.score({ species: 'Cat' }); + // { animal: 1 } + + + +--- + +## [Literate Source](http://macwright.org/simple-statistics/) + +## Usage + +To use it in browsers, grab [simple_statistics.js](https://raw.github.com/tmcw/simple-statistics/master/src/simple_statistics.js). +To use it in node, install it with [npm](https://npmjs.org/) or add it to your package.json. + + npm install simple-statistics + +To use it with [component](https://github.com/component/component), + + component install tmcw/simple-statistics + +To use it with [bower](http://bower.io/), + + bower install simple-statistics + +## Basic Descriptive Statistics + +```javascript +// Require simple statistics +var ss = require('simple-statistics'); + +// The input is a simple array +var list = [1, 2, 3]; + +// Many different descriptive statistics are supported +var sum = ss.sum(list), + mean = ss.mean(list), + min = ss.min(list), + geometric_mean = ss.geometric_mean(list), + max = ss.max(list), + quantile = ss.quantile(0.25); +``` + +## Linear Regression + +```javascript +// For a linear regression, it's a two-dimensional array +var data = [ [1, 2], [2, 3] ]; + +// simple-statistics can produce a linear regression and return +// a friendly javascript function for the line. +var line = ss.linear_regression() + .data(data) + .line(); + +// get a point along the line function +line(0); + +var line = ss.linear_regression() + +// Get the r-squared value of the line estimation +ss.r_squared(data, line); +``` + +### Bayesian Classifier + +```javascript +var bayes = ss.bayesian(); +bayes.train({ species: 'Cat' }, 'animal'); +bayes.score({ species: 'Cat' }); +// { animal: 1 } +``` + +### Mixin Style + +_This is **optional** and not used by default. You can opt-in to mixins +with `ss.mixin()`._ + +This mixes `simple-statistics` methods into the Array prototype - note that +[extending native objects](http://perfectionkills.com/extending-native-builtins/) is a +tricky move. + +This will _only work_ if `defineProperty` is available, which means modern browsers +and nodejs - on IE8 and below, calling `ss.mixin()` will throw an exception. + +```javascript +// mixin to Array class +ss.mixin(); + +// The input is a simple array +var list = [1, 2, 3]; + +// The same descriptive techniques as above, but in a simpler style +var sum = list.sum(), + mean = list.mean(), + min = list.min(), + max = list.max(), + quantile = list.quantile(0.25); +``` + +## Examples + +* [Linear regression with simple-statistics and d3js](http://bl.ocks.org/3931800) +* [Jenks Natural Breaks with a choropleth map with d3js](http://bl.ocks.org/tmcw/4969184) + +# Contributors + +* Tom MacWright +* [Matt Sacks](https://github.com/mattsacks) +* Doron Linder +* [Alexander Sicular](https://github.com/siculars) diff --git a/perf/simple-statistics/README.test.md b/perf/simple-statistics/README.test.md new file mode 100644 index 0000000..5734feb --- /dev/null +++ b/perf/simple-statistics/README.test.md @@ -0,0 +1,157 @@ +[![Build Status](https://secure.travis-ci.org/tmcw/simple-statistics.png?branch=master)](http://travis-ci.org/tmcw/simple-statistics) + +A JavaScript implementation of descriptive, regression, and inference statistics. + +Implemented in literate JavaScript with no dependencies, designed to work +in all modern browsers (including IE) as well as in node.js. + +# [API](API.md) + +[Full documentation](API.md) + +--- +``` + + Basic Array Operations + .mixin() + .mean(x) + .sum(x) + .variance(x) + .standard_deviation(x) + .median_absolute_deviation(x) + .median(x) + .geometric_mean(x) + .harmonic_mean(x) + .root_mean_square(x) + .min(x) + .max(x) + .t_test(sample, x) + .t_test_two_sample(sample_x, sample_y, difference) + .sample_variance(x) + .sample_covariance(x) + .sample_correlation(x) + .quantile(sample, p) + .iqr(sample) + .sample_skewness(sample) + .jenks(data, number_of_classes) + .r_squared(data, function) + .cumulative_std_normal_probability(z) + .z_score(x, mean, standard_deviation) + .standard_normal_table + Regression + .linear_regression() + .data([[1, 1], [2, 2]]) + .line() + .m() + .b() + Classification + .bayesian() + .train(item, category) + .score(item) +``` + +--- + +# [Literate Source](http://macwright.org/simple-statistics/) + +## Usage + +To use it in browsers, grab [simple_statistics.js](https://raw.github.com/tmcw/simple-statistics/master/src/simple_statistics.js). +To use it in node, install it with [npm](https://npmjs.org/) or add it to your package.json. + + npm install simple-statistics + +To use it with [component](https://github.com/component/component), + + component install tmcw/simple-statistics + +To use it with [bower](http://bower.io/), + + bower install simple-statistics + +## Basic Descriptive Statistics + +```javascript +// Require simple statistics +var ss = require('simple-statistics'); + +// The input is a simple array +var list = [1, 2, 3]; + +// Many different descriptive statistics are supported +var sum = ss.sum(list), + mean = ss.mean(list), + min = ss.min(list), + geometric_mean = ss.geometric_mean(list), + max = ss.max(list), + quantile = ss.quantile(0.25); +``` + +## Linear Regression + +```javascript +// For a linear regression, it's a two-dimensional array +var data = [ [1, 2], [2, 3] ]; + +// simple-statistics can produce a linear regression and return +// a friendly javascript function for the line. +var line = ss.linear_regression() + .data(data) + .line(); + +// get a point along the line function +line(0); + +var line = ss.linear_regression() + +// Get the r-squared value of the line estimation +ss.r_squared(data, line); +``` + +### Bayesian Classifier + +```javascript +var bayes = ss.bayesian(); +bayes.train({ species: 'Cat' }, 'animal'); +bayes.score({ species: 'Cat' }); +// { animal: 1 } +``` + +### Mixin Style + +_This is **optional** and not used by default. You can opt-in to mixins +with `ss.mixin()`._ + +This mixes `simple-statistics` methods into the Array prototype - note that +[extending native objects](http://perfectionkills.com/extending-built-in-native-objects-evil-or-not/) is a +tricky move. + +This will _only work_ if `defineProperty` is available, which means modern browsers +and nodejs - on IE8 and below, calling `ss.mixin()` will throw an exception. + +```javascript +// mixin to Array class +ss.mixin(); + +// The input is a simple array +var list = [1, 2, 3]; + +// The same descriptive techniques as above, but in a simpler style +var sum = list.sum(), + mean = list.mean(), + min = list.min(), + max = list.max(), + quantile = list.quantile(0.25); +``` + +## Examples + +* [Linear regression with simple-statistics and d3js](http://bl.ocks.org/3931800) +* [Jenks Natural Breaks with a choropleth map with d3js](http://bl.ocks.org/tmcw/4969184) + +# Contributors + +* Tom MacWright +* [Matt Sacks](https://github.com/mattsacks) +* Doron Linder +* [Alexander Sicular](https://github.com/siculars) diff --git a/perf/simple-statistics/SEEALSO.md b/perf/simple-statistics/SEEALSO.md new file mode 100644 index 0000000..c4699ba --- /dev/null +++ b/perf/simple-statistics/SEEALSO.md @@ -0,0 +1,23 @@ +## See Also + +* [stream-statistics](https://github.com/tmcw/stream-statistics), a sister project that implements + many of the same measures for streaming data - as online algorithms + +### Javascript + +* [science.js](https://github.com/jasondavies/science.js) +* [atoll.js](https://github.com/nsfmc/atoll.js) +* [descriptive_statistics](https://github.com/thirtysixthspan/descriptive_statistics) +* [jStat](http://www.jstat.org/) +* [classifier](https://github.com/harthur/classifier) is a naive bayesian classifier (though specialized for the words-spam case) +* [underscore.math](https://github.com/syntagmatic/underscore.math/blob/master/underscore.math.js) + +### Python + +* [Pandas](http://pandas.pydata.org/) +* [SciPy](http://www.scipy.org/) + +### Their Own Language + +* [Julia Language](http://julialang.org/) +* [R language](http://www.r-project.org/) diff --git a/perf/simple-statistics/api.js b/perf/simple-statistics/api.js new file mode 100644 index 0000000..531e3e1 --- /dev/null +++ b/perf/simple-statistics/api.js @@ -0,0 +1,20 @@ +var fs = require('fs'); + +var readme = fs.readFileSync('README.md', 'utf8') + .split('\n'); + +var a = true, b = true; + +fs.writeFileSync('README.md', readme.filter(function(f) { + if (f === '---') { + a = !a; + return true; + } + return a; +}).map(function(f) { + if (f === '---' && b) { + f = f + '\n\n' + fs.readFileSync('API.md', 'utf8') + '\n\n'; + b = false; + } + return f; +}).join('\n')); diff --git a/perf/simple-statistics/bower.json b/perf/simple-statistics/bower.json new file mode 100644 index 0000000..b4349a1 --- /dev/null +++ b/perf/simple-statistics/bower.json @@ -0,0 +1,11 @@ +{ + "name": "simple-statistics", + "version": "0.9.0", + "description": "Simple Statistics", + "repo": "tmcw/simple-statistics", + "keywords": [], + "license": "ISC", + "dependencies": {}, + "development": {}, + "main": "src/simple_statistics.js" +} \ No newline at end of file diff --git a/perf/simple-statistics/component.json b/perf/simple-statistics/component.json new file mode 100644 index 0000000..7973639 --- /dev/null +++ b/perf/simple-statistics/component.json @@ -0,0 +1,13 @@ +{ + "name": "simple-statistics", + "version": "0.9.0", + "description": "Simple Statistics", + "repo": "tmcw/simple-statistics", + "keywords": [], + "license": "ISC", + "dependencies": {}, + "development": {}, + "scripts": [ + "src/simple_statistics.js" + ] +} \ No newline at end of file diff --git a/perf/simple-statistics/docs/docco.css b/perf/simple-statistics/docs/docco.css new file mode 100644 index 0000000..a2899ac --- /dev/null +++ b/perf/simple-statistics/docs/docco.css @@ -0,0 +1,506 @@ +/*--------------------- Typography ----------------------------*/ + +@font-face { + font-family: 'aller-light'; + src: url('public/fonts/aller-light.eot'); + src: url('public/fonts/aller-light.eot?#iefix') format('embedded-opentype'), + url('public/fonts/aller-light.woff') format('woff'), + url('public/fonts/aller-light.ttf') format('truetype'); + font-weight: normal; + font-style: normal; +} + +@font-face { + font-family: 'aller-bold'; + src: url('public/fonts/aller-bold.eot'); + src: url('public/fonts/aller-bold.eot?#iefix') format('embedded-opentype'), + url('public/fonts/aller-bold.woff') format('woff'), + url('public/fonts/aller-bold.ttf') format('truetype'); + font-weight: normal; + font-style: normal; +} + +@font-face { + font-family: 'novecento-bold'; + src: url('public/fonts/novecento-bold.eot'); + src: url('public/fonts/novecento-bold.eot?#iefix') format('embedded-opentype'), + url('public/fonts/novecento-bold.woff') format('woff'), + url('public/fonts/novecento-bold.ttf') format('truetype'); + font-weight: normal; + font-style: normal; +} + +/*--------------------- Layout ----------------------------*/ +html { height: 100%; } +body { + font-family: "aller-light"; + font-size: 14px; + line-height: 18px; + color: #30404f; + margin: 0; padding: 0; + height:100%; +} +#container { min-height: 100%; } + +a { + color: #000; +} + +b, strong { + font-weight: normal; + font-family: "aller-bold"; +} + +p { + margin: 15px 0 0px; +} + .annotation ul, .annotation ol { + margin: 25px 0; + } + .annotation ul li, .annotation ol li { + font-size: 14px; + line-height: 18px; + margin: 10px 0; + } + +h1, h2, h3, h4, h5, h6 { + color: #112233; + line-height: 1em; + font-weight: normal; + font-family: "novecento-bold"; + text-transform: uppercase; + margin: 30px 0 15px 0; +} + +h1 { + margin-top: 40px; +} + +hr { + border: 0; + background: 1px #ddd; + height: 1px; + margin: 20px 0; +} + +pre, tt, code { + font-size: 12px; line-height: 16px; + font-family: Menlo, Monaco, Consolas, "Lucida Console", monospace; + margin: 0; padding: 0; +} + .annotation pre { + display: block; + margin: 0; + padding: 7px 10px; + background: #fcfcfc; + -moz-box-shadow: inset 0 0 10px rgba(0,0,0,0.1); + -webkit-box-shadow: inset 0 0 10px rgba(0,0,0,0.1); + box-shadow: inset 0 0 10px rgba(0,0,0,0.1); + overflow-x: auto; + } + .annotation pre code { + border: 0; + padding: 0; + background: transparent; + } + + +blockquote { + border-left: 5px solid #ccc; + margin: 0; + padding: 1px 0 1px 1em; +} + .sections blockquote p { + font-family: Menlo, Consolas, Monaco, monospace; + font-size: 12px; line-height: 16px; + color: #999; + margin: 10px 0 0; + white-space: pre-wrap; + } + +ul.sections { + list-style: none; + padding:0 0 5px 0;; + margin:0; +} + +/* + Force border-box so that % widths fit the parent + container without overlap because of margin/padding. + + More Info : http://www.quirksmode.org/css/box.html +*/ +ul.sections > li > div { + -moz-box-sizing: border-box; /* firefox */ + -ms-box-sizing: border-box; /* ie */ + -webkit-box-sizing: border-box; /* webkit */ + -khtml-box-sizing: border-box; /* konqueror */ + box-sizing: border-box; /* css3 */ +} + + +/*---------------------- Jump Page -----------------------------*/ +#jump_to, #jump_page { + margin: 0; + background: white; + -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777; + -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px; + font: 16px Arial; + cursor: pointer; + text-align: right; + list-style: none; +} + +#jump_to a { + text-decoration: none; +} + +#jump_to a.large { + display: none; +} +#jump_to a.small { + font-size: 22px; + font-weight: bold; + color: #676767; +} + +#jump_to, #jump_wrapper { + position: fixed; + right: 0; top: 0; + padding: 10px 15px; + margin:0; +} + +#jump_wrapper { + display: none; + padding:0; +} + +#jump_to:hover #jump_wrapper { + display: block; +} + +#jump_page { + padding: 5px 0 3px; + margin: 0 0 25px 25px; +} + +#jump_page .source { + display: block; + padding: 15px; + text-decoration: none; + border-top: 1px solid #eee; +} + +#jump_page .source:hover { + background: #f5f5ff; +} + +#jump_page .source:first-child { +} + +/*---------------------- Low resolutions (> 320px) ---------------------*/ +@media only screen and (min-width: 320px) { + .pilwrap { display: none; } + + ul.sections > li > div { + display: block; + padding:5px 10px 0 10px; + } + + ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol { + padding-left: 30px; + } + + ul.sections > li > div.content { + overflow-x:auto; + -webkit-box-shadow: inset 0 0 5px #e5e5ee; + box-shadow: inset 0 0 5px #e5e5ee; + border: 1px solid #dedede; + margin:5px 10px 5px 10px; + padding-bottom: 5px; + } + + ul.sections > li > div.annotation pre { + margin: 7px 0 7px; + padding-left: 15px; + } + + ul.sections > li > div.annotation p tt, .annotation code { + background: #f8f8ff; + border: 1px solid #dedede; + font-size: 12px; + padding: 0 0.2em; + } +} + +/*---------------------- (> 481px) ---------------------*/ +@media only screen and (min-width: 481px) { + #container { + position: relative; + } + body { + background-color: #F5F5FF; + font-size: 15px; + line-height: 21px; + } + pre, tt, code { + line-height: 18px; + } + p, ul, ol { + margin: 0 0 15px; + } + + + #jump_to { + padding: 5px 10px; + } + #jump_wrapper { + padding: 0; + } + #jump_to, #jump_page { + font: 10px Arial; + text-transform: uppercase; + } + #jump_page .source { + padding: 5px 10px; + } + #jump_to a.large { + display: inline-block; + } + #jump_to a.small { + display: none; + } + + + + #background { + position: absolute; + top: 0; bottom: 0; + width: 350px; + background: #fff; + border-right: 1px solid #e5e5ee; + z-index: -1; + } + + ul.sections > li > div.annotation ul, ul.sections > li > div.annotation ol { + padding-left: 40px; + } + + ul.sections > li { + white-space: nowrap; + } + + ul.sections > li > div { + display: inline-block; + } + + ul.sections > li > div.annotation { + max-width: 350px; + min-width: 350px; + min-height: 5px; + padding: 13px; + overflow-x: hidden; + white-space: normal; + vertical-align: top; + text-align: left; + } + ul.sections > li > div.annotation pre { + margin: 15px 0 15px; + padding-left: 15px; + } + + ul.sections > li > div.content { + padding: 13px; + vertical-align: top; + border: none; + -webkit-box-shadow: none; + box-shadow: none; + } + + .pilwrap { + position: relative; + display: inline; + } + + .pilcrow { + font: 12px Arial; + text-decoration: none; + color: #454545; + position: absolute; + top: 3px; left: -20px; + padding: 1px 2px; + opacity: 0; + -webkit-transition: opacity 0.2s linear; + } + .for-h1 .pilcrow { + top: 47px; + } + .for-h2 .pilcrow, .for-h3 .pilcrow, .for-h4 .pilcrow { + top: 35px; + } + + ul.sections > li > div.annotation:hover .pilcrow { + opacity: 1; + } +} + +/*---------------------- (> 1025px) ---------------------*/ +@media only screen and (min-width: 1025px) { + + body { + font-size: 16px; + line-height: 24px; + } + + #background { + width: 525px; + } + ul.sections > li > div.annotation { + max-width: 525px; + min-width: 525px; + padding: 10px 25px 1px 50px; + } + ul.sections > li > div.content { + padding: 9px 15px 16px 25px; + } +} + +/*---------------------- Syntax Highlighting -----------------------------*/ + +td.linenos { background-color: #f0f0f0; padding-right: 10px; } +span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } +/* + +github.com style (c) Vasily Polovnyov/* global module */
A simple, literate statistics system. The code below uses the
+Javascript module pattern,
+eventually assigning simple-statistics
to ss
in browsers or the
+exports
object for node.js
(function() {
+ var ss = {};
+
+ if (typeof module !== 'undefined') {
module.exports = ss;
+ } else {
Otherwise, in a browser, we assign ss
to the window object,
+so you can simply refer to it as ss
.
this.ss = ss;
+ }
Simple linear regression +is a simple way to find a fitted line +between a set of coordinates.
+ + function linear_regression() {
+ var linreg = {},
+ data = [];
Assign data to the model. Data is assumed to be an array.
+ + linreg.data = function(x) {
+ if (!arguments.length) return data;
+ data = x.slice();
+ return linreg;
+ };
Calculate the slope and y-intercept of the regression line +by calculating the least sum of squares
+ + linreg.mb = function() {
+ var m, b;
Store data length in a local variable to reduce +repeated object property lookups
+ + var data_length = data.length;
if there’s only one point, arbitrarily choose a slope of 0 +and a y-intercept of whatever the y of the initial point is
+ + if (data_length === 1) {
+ m = 0;
+ b = data[0][1];
+ } else {
Initialize our sums and scope the m
and b
+variables that define the line.
var sum_x = 0, sum_y = 0,
+ sum_xx = 0, sum_xy = 0;
Use local variables to grab point values +with minimal object property lookups
+ + var point, x, y;
Gather the sum of all x values, the sum of all +y values, and the sum of x^2 and (x*y) for each +value.
+In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy
+ + for (var i = 0; i < data_length; i++) {
+ point = data[i];
+ x = point[0];
+ y = point[1];
+
+ sum_x += x;
+ sum_y += y;
+
+ sum_xx += x * x;
+ sum_xy += x * y;
+ }
m
is the slope of the regression line
m = ((data_length * sum_xy) - (sum_x * sum_y)) /
+ ((data_length * sum_xx) - (sum_x * sum_x));
b
is the y-intercept of the line.
b = (sum_y / data_length) - ((m * sum_x) / data_length);
+ }
Return both values as an object.
+ + return { m: m, b: b };
+ };
a shortcut for simply getting the slope of the regression line
+ + linreg.m = function() {
+ return linreg.mb().m;
+ };
a shortcut for simply getting the y-intercept of the regression +line.
+ + linreg.b = function() {
+ return linreg.mb().b;
+ };
This is called after .data()
and returns the
+equation y = f(x)
which gives the position
+of the regression line at each point in x
.
linreg.line = function() {
Get the slope, m
, and y-intercept, b
, of the line.
var mb = linreg.mb(),
+ m = mb.m,
+ b = mb.b;
Return a function that computes a y
value for each
+x value it is given, based on the values of b
and a
+that we just computed.
return function(x) {
+ return b + (m * x);
+ };
+ };
+
+ return linreg;
+ }
The r-squared value of data compared with a function f
+is the sum of the squared differences between the prediction
+and the actual value.
function r_squared(data, f) {
+ if (data.length < 2) return 1;
Compute the average y value for the actual +data set in order to compute the +total sum of squares
+ + var sum = 0, average;
+ for (var i = 0; i < data.length; i++) {
+ sum += data[i][1];
+ }
+ average = sum / data.length;
Compute the total sum of squares - the +squared difference between each point +and the average of all points.
+ + var sum_of_squares = 0;
+ for (var j = 0; j < data.length; j++) {
+ sum_of_squares += Math.pow(average - data[j][1], 2);
+ }
Finally estimate the error: the squared +difference between the estimate and the actual data +value at each point.
+ + var err = 0;
+ for (var k = 0; k < data.length; k++) {
+ err += Math.pow(data[k][1] - f(data[k][0]), 2);
+ }
As the error grows larger, its ratio to the +sum of squares increases and the r squared +value grows lower.
+ + return 1 - (err / sum_of_squares);
+ }
This is a naïve bayesian classifier that takes +singly-nested objects.
+ + function bayesian() {
The bayes_model
object is what will be exposed
+by this closure, with all of its extended methods, and will
+have access to all scope variables, like total_count
.
var bayes_model = {},
The number of items that are currently +classified in the model
+ + total_count = 0,
Every item classified in the model
+ + data = {};
Train the classifier with a new item, which has a single +dimension of Javascript literal keys and values.
+ + bayes_model.train = function(item, category) {
If the data object doesn’t have any values +for this category, create a new object for it.
+ + if (!data[category]) data[category] = {};
Iterate through each key in the item.
+ + for (var k in item) {
+ var v = item[k];
Initialize the nested object data[category][k][item[k]]
+with an object of keys that equal 0.
if (data[category][k] === undefined) data[category][k] = {};
+ if (data[category][k][v] === undefined) data[category][k][v] = 0;
And increment the key for this key/value combination.
+ + data[category][k][item[k]]++;
+ }
Increment the number of items classified
+ + total_count++;
+ };
Generate a score of how well this item matches all +possible categories based on its attributes
+ + bayes_model.score = function(item) {
Initialize an empty array of odds per category.
+ + var odds = {}, category;
Iterate through each key in the item,
+then iterate through each category that has been used
+in previous calls to .train()
for (var k in item) {
+ var v = item[k];
+ for (category in data) {
Create an empty object for storing key - value combinations +for this category.
+ + if (odds[category] === undefined) odds[category] = {};
If this item doesn’t even have a property, it counts for nothing, +but if it does have the property that we’re looking for from +the item to categorize, it counts based on how popular it is +versus the whole population.
+ + if (data[category][k]) {
+ odds[category][k + '_' + v] = (data[category][k][v] || 0) / total_count;
+ } else {
+ odds[category][k + '_' + v] = 0;
+ }
+ }
+ }
Set up a new object that will contain sums of these odds by category
+ + var odds_sums = {};
+
+ for (category in odds) {
Tally all of the odds for each category-combination pair - +the non-existence of a category does not add anything to the +score.
+ + for (var combination in odds[category]) {
+ if (odds_sums[category] === undefined) odds_sums[category] = 0;
+ odds_sums[category] += odds[category][combination];
+ }
+ }
+
+ return odds_sums;
+ };
Return the completed model.
+ + return bayes_model;
+ }
is simply the result of adding all numbers +together, starting from zero.
+This runs on O(n)
, linear time in respect to the array
function sum(x) {
+ var value = 0;
+ for (var i = 0; i < x.length; i++) {
+ value += x[i];
+ }
+ return value;
+ }
is the sum over the number of values
+This runs on O(n)
, linear time in respect to the array
function mean(x) {
The mean of no numbers is null
+ + if (x.length === 0) return null;
+
+ return sum(x) / x.length;
+ }
a mean function that is more useful for numbers in different +ranges.
+this is the nth root of the input numbers multiplied by each other
+This runs on O(n)
, linear time in respect to the array
function geometric_mean(x) {
The mean of no numbers is null
+ + if (x.length === 0) return null;
the starting value.
+ + var value = 1;
+
+ for (var i = 0; i < x.length; i++) {
the geometric mean is only valid for positive numbers
+ + if (x[i] <= 0) return null;
repeatedly multiply the value by each number
+ + value *= x[i];
+ }
+
+ return Math.pow(value, 1 / x.length);
+ }
a mean function typically used to find the average of rates
+this is the reciprocal of the arithmetic mean of the reciprocals +of the input numbers
+This runs on O(n)
, linear time in respect to the array
function harmonic_mean(x) {
The mean of no numbers is null
+ + if (x.length === 0) return null;
+
+ var reciprocal_sum = 0;
+
+ for (var i = 0; i < x.length; i++) {
the harmonic mean is only valid for positive numbers
+ + if (x[i] <= 0) return null;
+
+ reciprocal_sum += 1 / x[i];
+ }
divide n by the the reciprocal sum
+ + return x.length / reciprocal_sum;
+ }
This is simply the minimum number in the set.
+This runs on O(n)
, linear time in respect to the array
function min(x) {
+ var value;
+ for (var i = 0; i < x.length; i++) {
On the first iteration of this loop, min is +undefined and is thus made the minimum element in the array
+ + if (x[i] < value || value === undefined) value = x[i];
+ }
+ return value;
+ }
This is simply the maximum number in the set.
+This runs on O(n)
, linear time in respect to the array
function max(x) {
+ var value;
+ for (var i = 0; i < x.length; i++) {
On the first iteration of this loop, max is +undefined and is thus made the maximum element in the array
+ + if (x[i] > value || value === undefined) value = x[i];
+ }
+ return value;
+ }
function variance(x) {
The variance of no numbers is null
+ + if (x.length === 0) return null;
+
+ var mean_value = mean(x),
+ deviations = [];
Make a list of squared deviations from the mean.
+ + for (var i = 0; i < x.length; i++) {
+ deviations.push(Math.pow(x[i] - mean_value, 2));
+ }
Find the mean value of that list
+ + return mean(deviations);
+ }
function standard_deviation(x) {
The standard deviation of no numbers is null
+ + if (x.length === 0) return null;
+
+ return Math.sqrt(variance(x));
+ }
The sum of deviations to the Nth power. +When n=2 it’s the sum of squared deviations. +When n=3 it’s the sum of cubed deviations.
+depends on mean()
function sum_nth_power_deviations(x, n) {
+ var mean_value = mean(x),
+ sum = 0;
+
+ for (var i = 0; i < x.length; i++) {
+ sum += Math.pow(x[i] - mean_value, n);
+ }
+
+ return sum;
+ }
is the sum of squared deviations from the mean
+depends on sum_nth_power_deviations
function sample_variance(x) {
The variance of no numbers is null
+ + if (x.length <= 1) return null;
+
+ var sum_squared_deviations_value = sum_nth_power_deviations(x, 2);
Find the mean value of that list
+ + return sum_squared_deviations_value / (x.length - 1);
+ }
is just the square root of the variance.
+depends on sample_variance()
function sample_standard_deviation(x) {
The standard deviation of no numbers is null
+ + if (x.length <= 1) return null;
+
+ return Math.sqrt(sample_variance(x));
+ }
sample covariance of two datasets: +how much do the two datasets move together? +x and y are two datasets, represented as arrays of numbers.
+depends on mean()
function sample_covariance(x, y) {
The two datasets must have the same length which must be more than 1
+ + if (x.length <= 1 || x.length != y.length){
+ return null;
+ }
determine the mean of each dataset so that we can judge each +value of the dataset fairly as the difference from the mean. this +way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance +does not suffer because of the difference in absolute values
+ + var xmean = mean(x),
+ ymean = mean(y),
+ sum = 0;
for each pair of values, the covariance increases when their +difference from the mean is associated - if both are well above +or if both are well below +the mean, the covariance increases significantly.
+ + for (var i = 0; i < x.length; i++){
+ sum += (x[i] - xmean) * (y[i] - ymean);
+ }
the covariance is weighted by the length of the datasets.
+ + return sum / (x.length - 1);
+ }
Gets a measure of how correlated two datasets are, between -1 and 1
+depends on sample_standard_deviation()
and sample_covariance()
function sample_correlation(x, y) {
+ var cov = sample_covariance(x, y),
+ xstd = sample_standard_deviation(x),
+ ystd = sample_standard_deviation(y);
+
+ if (cov === null || xstd === null || ystd === null) {
+ return null;
+ }
+
+ return cov / xstd / ystd;
+ }
The middle number of a list. This is often a good indicator of ‘the middle’
+when there are outliers that skew the mean()
value.
function median(x) {
The median of an empty list is null
+ + if (x.length === 0) return null;
Sorting the array makes it easy to find the center, but
+use .slice()
to ensure the original array x
is not modified
var sorted = x.slice().sort(function (a, b) { return a - b; });
If the length of the list is odd, it’s the central number
+ + if (sorted.length % 2 === 1) {
+ return sorted[(sorted.length - 1) / 2];
Otherwise, the median is the average of the two numbers +at the center of the list
+ + } else {
+ var a = sorted[(sorted.length / 2) - 1];
+ var b = sorted[(sorted.length / 2)];
+ return (a + b) / 2;
+ }
+ }
The mode is the number that appears in a list the highest number of times. +There can be multiple modes in a list: in the event of a tie, this +algorithm will return the most recently seen mode.
+This implementation is inspired by science.js
+This runs on O(n)
, linear time in respect to the array
function mode(x) {
Handle edge cases: +The median of an empty list is null
+ + if (x.length === 0) return null;
+ else if (x.length === 1) return x[0];
Sorting the array lets us iterate through it below and be sure +that every time we see a new number it’s new and we’ll never +see the same number twice
+ + var sorted = x.slice().sort(function (a, b) { return a - b; });
This assumes it is dealing with an array of size > 1, since size +0 and 1 are handled immediately. Hence it starts at index 1 in the +array.
+ + var last = sorted[0],
store the mode as we find new modes
+ + value,
store how many times we’ve seen the mode
+ + max_seen = 0,
how many times the current candidate for the mode +has been seen
+ + seen_this = 1;
end at sorted.length + 1 to fix the case in which the mode is +the highest number that occurs in the sequence. the last iteration +compares sorted[i], which is undefined, to the highest number +in the series
+ + for (var i = 1; i < sorted.length + 1; i++) {
we’re seeing a new number pass by
+ + if (sorted[i] !== last) {
the last number is the new mode since we saw it more +often than the old one
+ + if (seen_this > max_seen) {
+ max_seen = seen_this;
+ value = last;
+ }
+ seen_this = 1;
+ last = sorted[i];
if this isn’t a new number, it’s one more occurrence of +the potential mode
+ + } else { seen_this++; }
+ }
+ return value;
+ }
This is to compute a one-sample t-test, comparing the mean +of a sample to a known value, x.
+in this case, we’re trying to determine whether the
+population mean is equal to the value that we know, which is x
+here. usually the results here are used to look up a
+p-value, which, for
+a certain level of significance, will let you determine that the
+null hypothesis can or cannot be rejected.
Depends on standard_deviation()
and mean()
function t_test(sample, x) {
The mean of the sample
+ + var sample_mean = mean(sample);
The standard deviation of the sample
+ + var sd = standard_deviation(sample);
Square root the length of the sample
+ + var rootN = Math.sqrt(sample.length);
Compute the known value against the sample, +returning the t value
+ + return (sample_mean - x) / (sd / rootN);
+ }
This is to compute two sample t-test.
+Tests whether “mean(X)-mean(Y) = difference”, (
+in the most common case, we often have difference == 0
to test if two samples
+are likely to be taken from populations with the same mean value) with
+no prior knowledge on standard deviations of both samples
+other than the fact that they have the same standard deviation.
Usually the results here are used to look up a +p-value, which, for +a certain level of significance, will let you determine that the +null hypothesis can or cannot be rejected.
+diff
can be omitted if it equals 0.
This is used to confirm or deny
+a null hypothesis that the two populations that have been sampled into
+sample_x
and sample_y
are equal to each other.
Depends on sample_variance()
and mean()
function t_test_two_sample(sample_x, sample_y, difference) {
+ var n = sample_x.length,
+ m = sample_y.length;
If either sample doesn’t actually have any values, we can’t
+compute this at all, so we return null
.
if (!n || !m) return null ;
default difference (mu) is zero
+ + if (!difference) difference = 0;
+
+ var meanX = mean(sample_x),
+ meanY = mean(sample_y);
+
+ var weightedVariance = ((n - 1) * sample_variance(sample_x) +
+ (m - 1) * sample_variance(sample_y)) / (n + m - 2);
+
+ return (meanX - meanY - difference) /
+ Math.sqrt(weightedVariance * (1 / n + 1 / m));
+ }
Split an array into chunks of a specified size. This function +has the same behavior as PHP’s array_chunk +function, and thus will insert smaller-sized chunks at the end if +the input size is not divisible by the chunk size.
+sample
is expected to be an array, and chunkSize
a number.
+The sample
array can contain any kind of data.
function chunk(sample, chunkSize) {
a list of result chunks, as arrays in an array
+ + var output = [];
chunkSize
must be zero or higher - otherwise the loop below,
+in which we call start += chunkSize
, will loop infinitely.
+So, we’ll detect and return null in that case to indicate
+invalid input.
if (chunkSize <= 0) {
+ return null;
+ }
start
is the index at which .slice
will start selecting
+new array elements
for (var start = 0; start < sample.length; start += chunkSize) {
for each chunk, slice that part of the array and add it
+to the output. The .slice
function does not change
+the original array.
output.push(sample.slice(start, start + chunkSize));
+ }
+ return output;
+ }
A Fisher-Yates shuffle +in-place - which means that it will change the order of the original +array by reference.
+ + function shuffle_in_place(sample, randomSource) {
a custom random number source can be provided if you want to use +a fixed seed or another random number generator, like +random-js
+ + randomSource = randomSource || Math.random;
store the current length of the sample to determine +when no elements remain to shuffle.
+ + var length = sample.length;
temporary is used to hold an item when it is being +swapped between indices.
+ + var temporary;
The index to swap at each stage.
+ + var index;
While there are still items to shuffle
+ + while (length > 0) {
chose a random index within the subset of the array +that is not yet shuffled
+ + index = Math.floor(randomSource() * length--);
store the value that we’ll move temporarily
+ + temporary = sample[length];
swap the value at sample[length]
with sample[index]
sample[length] = sample[index];
+ sample[index] = temporary;
+ }
+
+ return sample;
+ }
A Fisher-Yates shuffle +is a fast way to create a random permutation of a finite set.
+ + function shuffle(sample, randomSource) {
slice the original array so that it is not modified
+ + sample = sample.slice();
and then shuffle that shallow-copied array, in place
+ + return shuffle_in_place(sample.slice(), randomSource);
+ }
function sample(array, n, randomSource) {
shuffle the original array using a fisher-yates shuffle
+ + var shuffled = shuffle(array, randomSource);
and then return a subset of it - the first n
elements.
return shuffled.slice(0, n);
+ }
This is a population quantile, since we assume to know the entire +dataset in this library. Thus I’m trying to follow the +Quantiles of a Population +algorithm from wikipedia.
+Sample is a one-dimensional array of numbers, +and p is either a decimal number from 0 to 1 or an array of decimal +numbers from 0 to 1. +In terms of a k/q quantile, p = k/q - it’s just dealing with fractions or dealing +with decimal values. +When p is an array, the result of the function is also an array containing the appropriate +quantiles in input order
+ + function quantile(sample, p) {
We can’t derive quantiles from an empty list
+ + if (sample.length === 0) return null;
Sort a copy of the array. We’ll need a sorted array to index +the values in sorted order.
+ + var sorted = sample.slice().sort(function (a, b) { return a - b; });
+
+ if (p.length) {
Initialize the result array
+ + var results = [];
For each requested quantile
+ + for (var i = 0; i < p.length; i++) {
+ results[i] = quantile_sorted(sorted, p[i]);
+ }
+ return results;
+ } else {
+ return quantile_sorted(sorted, p);
+ }
+ }
This is the internal implementation of quantiles: when you know +that the order is sorted, you don’t need to re-sort it, and the computations +are much faster.
+ + function quantile_sorted(sample, p) {
+ var idx = (sample.length) * p;
+ if (p < 0 || p > 1) {
+ return null;
+ } else if (p === 1) {
If p is 1, directly return the last element
+ + return sample[sample.length - 1];
+ } else if (p === 0) {
If p is 0, directly return the first element
+ + return sample[0];
+ } else if (idx % 1 !== 0) {
If p is not integer, return the next element in array
+ + return sample[Math.ceil(idx) - 1];
+ } else if (sample.length % 2 === 0) {
If the list has even-length, we’ll take the average of this number +and the next value, if there is one
+ + return (sample[idx - 1] + sample[idx]) / 2;
+ } else {
Finally, in the simple case of an integer value +with an odd-length list, return the sample value at the index.
+ + return sample[idx];
+ }
+ }
A measure of statistical dispersion, or how scattered, spread, or +concentrated a distribution is. It’s computed as the difference between +the third quartile and first quartile.
+ + function iqr(sample) {
We can’t derive quantiles from an empty list
+ + if (sample.length === 0) return null;
Interquartile range is the span between the upper quartile,
+at 0.75
, and lower quartile, 0.25
return quantile(sample, 0.75) - quantile(sample, 0.25);
+ }
The Median Absolute Deviation (MAD) is a robust measure of statistical +dispersion. It is more resilient to outliers than the standard deviation.
+ + function mad(x) {
The mad of nothing is null
+ + if (!x || x.length === 0) return null;
+
+ var median_value = median(x),
+ median_absolute_deviations = [];
Make a list of absolute deviations from the median
+ + for (var i = 0; i < x.length; i++) {
+ median_absolute_deviations.push(Math.abs(x[i] - median_value));
+ }
Find the median value of that list
+ + return median(median_absolute_deviations);
+ }
Compute the matrices required for Jenks breaks. These matrices
+can be used for any classing of data with classes <= n_classes
function jenksMatrices(data, n_classes) {
in the original implementation, these matrices are referred to
+as LC
and OP
var lower_class_limits = [],
+ variance_combinations = [],
loop counters
+ + i, j,
the variance, as computed at each step in the calculation
+ + variance = 0;
Initialize and fill each matrix with zeroes
+ + for (i = 0; i < data.length + 1; i++) {
+ var tmp1 = [], tmp2 = [];
despite these arrays having the same values, we need +to keep them separate so that changing one does not change +the other
+ + for (j = 0; j < n_classes + 1; j++) {
+ tmp1.push(0);
+ tmp2.push(0);
+ }
+ lower_class_limits.push(tmp1);
+ variance_combinations.push(tmp2);
+ }
+
+ for (i = 1; i < n_classes + 1; i++) {
+ lower_class_limits[1][i] = 1;
+ variance_combinations[1][i] = 0;
in the original implementation, 9999999 is used but
+since Javascript has Infinity
, we use that.
for (j = 2; j < data.length + 1; j++) {
+ variance_combinations[j][i] = Infinity;
+ }
+ }
+
+ for (var l = 2; l < data.length + 1; l++) {
SZ
originally. this is the sum of the values seen thus
+far when calculating variance.
var sum = 0,
ZSQ
originally. the sum of squares of values seen
+thus far
sum_squares = 0,
WT
originally. This is the number of
w = 0,
IV
originally
i4 = 0;
in several instances, you could say Math.pow(x, 2)
+instead of x * x
, but this is slower in some browsers
+introduces an unnecessary concept.
for (var m = 1; m < l + 1; m++) {
III
originally
var lower_class_limit = l - m + 1,
+ val = data[lower_class_limit - 1];
here we’re estimating variance for each potential classing
+of the data, for each potential number of classes. w
+is the number of data points considered so far.
w++;
increase the current sum and sum-of-squares
+ + sum += val;
+ sum_squares += val * val;
the variance at this point in the sequence is the difference +between the sum of squares and the total x 2, over the number +of samples.
+ + variance = sum_squares - (sum * sum) / w;
+
+ i4 = lower_class_limit - 1;
+
+ if (i4 !== 0) {
+ for (j = 2; j < n_classes + 1; j++) {
if adding this element to an existing class
+will increase its variance beyond the limit, break
+the class at this point, setting the lower_class_limit
+at this point.
if (variance_combinations[l][j] >=
+ (variance + variance_combinations[i4][j - 1])) {
+ lower_class_limits[l][j] = lower_class_limit;
+ variance_combinations[l][j] = variance +
+ variance_combinations[i4][j - 1];
+ }
+ }
+ }
+ }
+
+ lower_class_limits[l][1] = 1;
+ variance_combinations[l][1] = variance;
+ }
return the two matrices. for just providing breaks, only
+lower_class_limits
is needed, but variances can be useful to
+evaluate goodness of fit.
return {
+ lower_class_limits: lower_class_limits,
+ variance_combinations: variance_combinations
+ };
+ }
the second part of the jenks recipe: take the calculated matrices +and derive an array of n breaks.
+ + function jenksBreaks(data, lower_class_limits, n_classes) {
+
+ var k = data.length - 1,
+ kclass = [],
+ countNum = n_classes;
the calculation of classes will never include the upper and +lower bounds, so we need to explicitly set them
+ + kclass[n_classes] = data[data.length - 1];
+ kclass[0] = data[0];
the lower_class_limits matrix is used as indices into itself
+here: the k
variable is reused in each iteration.
while (countNum > 1) {
+ kclass[countNum - 1] = data[lower_class_limits[k][countNum] - 2];
+ k = lower_class_limits[k][countNum] - 1;
+ countNum--;
+ }
+
+ return kclass;
+ }
Implementations: 1 (python), +2 (buggy), +3 (works)
+Depends on jenksBreaks()
and jenksMatrices()
function jenks(data, n_classes) {
+
+ if (n_classes > data.length) return null;
sort data in numerical order, since this is expected +by the matrices function
+ + data = data.slice().sort(function (a, b) { return a - b; });
get our basic matrices
+ + var matrices = jenksMatrices(data, n_classes),
we only need lower class limits here
+ + lower_class_limits = matrices.lower_class_limits;
extract n_classes out of the computed matrices
+ + return jenksBreaks(data, lower_class_limits, n_classes);
+
+ }
A measure of the extent to which a probability distribution of a +real-valued random variable “leans” to one side of the mean. +The skewness value can be positive or negative, or even undefined.
+Implementation is based on the adjusted Fisher-Pearson standardized +moment coefficient, which is the version found in Excel and several +statistical packages including Minitab, SAS and SPSS.
+Depends on sum_nth_power_deviations()
and sample_standard_deviation
function sample_skewness(x) {
The skewness of less than three arguments is null
+ + if (x.length < 3) return null;
+
+ var n = x.length,
+ cubed_s = Math.pow(sample_standard_deviation(x), 3),
+ sum_cubed_deviations = sum_nth_power_deviations(x, 3);
+
+ return n * sum_cubed_deviations / ((n - 1) * (n - 2) * cubed_s);
+ }
A standard normal table, also called the unit normal table or Z table, +is a mathematical table for the values of Φ (phi), which are the values of +the cumulative distribution function of the normal distribution. +It is used to find the probability that a statistic is observed below, +above, or between values on the standard normal distribution, and by +extension, any normal distribution.
+The probabilities are taken from http://en.wikipedia.org/wiki/Standard_normal_table +The table used is the cumulative, and not cumulative from 0 to mean +(even though the latter has 5 digits precision, instead of 4).
+ + var standard_normal_table = [
+ /* z 0.00 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 */
+ /* 0.0 */
+ 0.5000, 0.5040, 0.5080, 0.5120, 0.5160, 0.5199, 0.5239, 0.5279, 0.5319, 0.5359,
+ /* 0.1 */
+ 0.5398, 0.5438, 0.5478, 0.5517, 0.5557, 0.5596, 0.5636, 0.5675, 0.5714, 0.5753,
+ /* 0.2 */
+ 0.5793, 0.5832, 0.5871, 0.5910, 0.5948, 0.5987, 0.6026, 0.6064, 0.6103, 0.6141,
+ /* 0.3 */
+ 0.6179, 0.6217, 0.6255, 0.6293, 0.6331, 0.6368, 0.6406, 0.6443, 0.6480, 0.6517,
+ /* 0.4 */
+ 0.6554, 0.6591, 0.6628, 0.6664, 0.6700, 0.6736, 0.6772, 0.6808, 0.6844, 0.6879,
+ /* 0.5 */
+ 0.6915, 0.6950, 0.6985, 0.7019, 0.7054, 0.7088, 0.7123, 0.7157, 0.7190, 0.7224,
+ /* 0.6 */
+ 0.7257, 0.7291, 0.7324, 0.7357, 0.7389, 0.7422, 0.7454, 0.7486, 0.7517, 0.7549,
+ /* 0.7 */
+ 0.7580, 0.7611, 0.7642, 0.7673, 0.7704, 0.7734, 0.7764, 0.7794, 0.7823, 0.7852,
+ /* 0.8 */
+ 0.7881, 0.7910, 0.7939, 0.7967, 0.7995, 0.8023, 0.8051, 0.8078, 0.8106, 0.8133,
+ /* 0.9 */
+ 0.8159, 0.8186, 0.8212, 0.8238, 0.8264, 0.8289, 0.8315, 0.8340, 0.8365, 0.8389,
+ /* 1.0 */
+ 0.8413, 0.8438, 0.8461, 0.8485, 0.8508, 0.8531, 0.8554, 0.8577, 0.8599, 0.8621,
+ /* 1.1 */
+ 0.8643, 0.8665, 0.8686, 0.8708, 0.8729, 0.8749, 0.8770, 0.8790, 0.8810, 0.8830,
+ /* 1.2 */
+ 0.8849, 0.8869, 0.8888, 0.8907, 0.8925, 0.8944, 0.8962, 0.8980, 0.8997, 0.9015,
+ /* 1.3 */
+ 0.9032, 0.9049, 0.9066, 0.9082, 0.9099, 0.9115, 0.9131, 0.9147, 0.9162, 0.9177,
+ /* 1.4 */
+ 0.9192, 0.9207, 0.9222, 0.9236, 0.9251, 0.9265, 0.9279, 0.9292, 0.9306, 0.9319,
+ /* 1.5 */
+ 0.9332, 0.9345, 0.9357, 0.9370, 0.9382, 0.9394, 0.9406, 0.9418, 0.9429, 0.9441,
+ /* 1.6 */
+ 0.9452, 0.9463, 0.9474, 0.9484, 0.9495, 0.9505, 0.9515, 0.9525, 0.9535, 0.9545,
+ /* 1.7 */
+ 0.9554, 0.9564, 0.9573, 0.9582, 0.9591, 0.9599, 0.9608, 0.9616, 0.9625, 0.9633,
+ /* 1.8 */
+ 0.9641, 0.9649, 0.9656, 0.9664, 0.9671, 0.9678, 0.9686, 0.9693, 0.9699, 0.9706,
+ /* 1.9 */
+ 0.9713, 0.9719, 0.9726, 0.9732, 0.9738, 0.9744, 0.9750, 0.9756, 0.9761, 0.9767,
+ /* 2.0 */
+ 0.9772, 0.9778, 0.9783, 0.9788, 0.9793, 0.9798, 0.9803, 0.9808, 0.9812, 0.9817,
+ /* 2.1 */
+ 0.9821, 0.9826, 0.9830, 0.9834, 0.9838, 0.9842, 0.9846, 0.9850, 0.9854, 0.9857,
+ /* 2.2 */
+ 0.9861, 0.9864, 0.9868, 0.9871, 0.9875, 0.9878, 0.9881, 0.9884, 0.9887, 0.9890,
+ /* 2.3 */
+ 0.9893, 0.9896, 0.9898, 0.9901, 0.9904, 0.9906, 0.9909, 0.9911, 0.9913, 0.9916,
+ /* 2.4 */
+ 0.9918, 0.9920, 0.9922, 0.9925, 0.9927, 0.9929, 0.9931, 0.9932, 0.9934, 0.9936,
+ /* 2.5 */
+ 0.9938, 0.9940, 0.9941, 0.9943, 0.9945, 0.9946, 0.9948, 0.9949, 0.9951, 0.9952,
+ /* 2.6 */
+ 0.9953, 0.9955, 0.9956, 0.9957, 0.9959, 0.9960, 0.9961, 0.9962, 0.9963, 0.9964,
+ /* 2.7 */
+ 0.9965, 0.9966, 0.9967, 0.9968, 0.9969, 0.9970, 0.9971, 0.9972, 0.9973, 0.9974,
+ /* 2.8 */
+ 0.9974, 0.9975, 0.9976, 0.9977, 0.9977, 0.9978, 0.9979, 0.9979, 0.9980, 0.9981,
+ /* 2.9 */
+ 0.9981, 0.9982, 0.9982, 0.9983, 0.9984, 0.9984, 0.9985, 0.9985, 0.9986, 0.9986,
+ /* 3.0 */
+ 0.9987, 0.9987, 0.9987, 0.9988, 0.9988, 0.9989, 0.9989, 0.9989, 0.9990, 0.9990
+ ];
Since probability tables cannot be +printed for every normal distribution, as there are an infinite variety +of normal distributions, it is common practice to convert a normal to a +standard normal and then use the standard normal table to find probabilities
+ + function cumulative_std_normal_probability(z) {
Calculate the position of this value.
+ + var absZ = Math.abs(z),
Each row begins with a different +significant digit: 0.5, 0.6, 0.7, and so on. So the row is simply +this value’s significant digit: 0.567 will be in row 0, so row=0, +0.643 will be in row 1, so row=10.
+ + row = Math.floor(absZ * 10),
+ column = 10 * (Math.floor(absZ * 100) / 10 - Math.floor(absZ * 100 / 10)),
+ index = Math.min((row * 10) + column, standard_normal_table.length - 1);
The index we calculate must be in the table as a positive value, +but we still pay attention to whether the input is positive +or negative, and flip the output value as a last step.
+ + if (z >= 0) {
+ return standard_normal_table[index];
+ } else {
due to floating-point arithmetic, values in the table with +4 significant figures can nevertheless end up as repeating +fractions when they’re computed here.
+ + return +(1 - standard_normal_table[index]).toFixed(4);
+ }
+ }
The standard score is the number of standard deviations an observation +or datum is above or below the mean. Thus, a positive standard score +represents a datum above the mean, while a negative standard score +represents a datum below the mean. It is a dimensionless quantity +obtained by subtracting the population mean from an individual raw +score and then dividing the difference by the population standard +deviation.
+The z-score is only defined if one knows the population parameters; +if one only has a sample set, then the analogous computation with +sample mean and sample standard deviation yields the +Student’s t-statistic.
+ + function z_score(x, mean, standard_deviation) {
+ return (x - mean) / standard_deviation;
+ }
We use ε
, epsilon, as a stopping criterion when we want to iterate
+until we’re “close enough”.
var epsilon = 0.0001;
A factorial, usually written n!, is the product of all positive +integers less than or equal to n. Often factorial is implemented +recursively, but this iterative approach is significantly faster +and simpler.
+ + function factorial(n) {
factorial is mathematically undefined for negative numbers
+ + if (n < 0 ) { return null; }
typically you’ll expand the factorial function going down, like +5! = 5 4 3 2 1. This is going in the opposite direction, +counting from 2 up to the number in question, and since anything +multiplied by 1 is itself, the loop only needs to start at 2.
+ + var accumulator = 1;
+ for (var i = 2; i <= n; i++) {
for each number up to and including the number n
, multiply
+the accumulator my that number.
accumulator *= i;
+ }
+ return accumulator;
+ }
The Bernoulli distribution
+is the probability discrete
+distribution of a random variable which takes value 1 with success
+probability p
and value 0 with failure
+probability q
= 1 - p
. It can be used, for example, to represent the
+toss of a coin, where “1” is defined to mean “heads” and “0” is defined
+to mean “tails” (or vice versa). It is
+a special case of a Binomial Distribution
+where n
= 1.
function bernoulli_distribution(p) {
Check that p
is a valid probability (0 ≤ p ≤ 1)
if (p < 0 || p > 1 ) { return null; }
+
+ return binomial_distribution(1, p);
+ }
The Binomial Distribution is the discrete probability
+distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields
+success with probability probability
. Such a success/failure experiment is also called a Bernoulli experiment or
+Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution.
function binomial_distribution(trials, probability) {
Check that p
is a valid probability (0 ≤ p ≤ 1),
+that n
is an integer, strictly positive.
if (probability < 0 || probability > 1 ||
+ trials <= 0 || trials % 1 !== 0) {
+ return null;
+ }
function probability_mass(x, trials, probability) {
+ return factorial(trials) /
+ (factorial(x) * factorial(trials - x)) *
+ (Math.pow(probability, x) * Math.pow(1 - probability, trials - x));
+ }
We initialize x
, the random variable, and accumulator
, an accumulator
+for the cumulative distribution function to 0. distribution_functions
+is the object we’ll return with the probability_of_x
and the
+cumulative_probability_of_x
, as well as the calculated mean &
+variance. We iterate until the cumulative_probability_of_x
is
+within epsilon
of 1.0.
var x = 0,
+ cumulative_probability = 0,
+ cells = {};
This algorithm iterates through each potential outcome,
+until the cumulative_probability
is very close to 1, at
+which point we’ve defined the vast majority of outcomes
do {
+ cells[x] = probability_mass(x, trials, probability);
+ cumulative_probability += cells[x];
+ x++;
when the cumulative_probability is nearly 1, we’ve calculated +the useful range of this distribution
+ + } while (cumulative_probability < 1 - epsilon);
+
+ return cells;
+ }
The Poisson Distribution +is a discrete probability distribution that expresses the probability +of a given number of events occurring in a fixed interval of time +and/or space if these events occur with a known average rate and +independently of the time since the last event.
+The Poisson Distribution is characterized by the strictly positive
+mean arrival or occurrence rate, λ
.
function poisson_distribution(lambda) {
Check that lambda is strictly positive
+ + if (lambda <= 0) { return null; }
our current place in the distribution
+ + var x = 0,
and we keep track of the current cumulative probability, in +order to know when to stop calculating chances.
+ + cumulative_probability = 0,
the calculated cells to be returned
+ + cells = {};
function probability_mass(x, lambda) {
+ return (Math.pow(Math.E, -lambda) * Math.pow(lambda, x)) /
+ factorial(x);
+ }
This algorithm iterates through each potential outcome,
+until the cumulative_probability
is very close to 1, at
+which point we’ve defined the vast majority of outcomes
do {
+ cells[x] = probability_mass(x, lambda);
+ cumulative_probability += cells[x];
+ x++;
when the cumulative_probability is nearly 1, we’ve calculated +the useful range of this distribution
+ + } while (cumulative_probability < 1 - epsilon);
+
+ return cells;
+ }
The χ2 (Chi-Squared) Distribution is used in the common +chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two +criteria of classification of qualitative data, and in confidence interval estimation for a population standard +deviation of a normal distribution from a sample standard deviation.
+Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, “Probability and Statistics in +Engineering and Management Science”, Wiley (1980).
+ + var chi_squared_distribution_table = {
+ 1: { 0.995: 0.00, 0.99: 0.00, 0.975: 0.00, 0.95: 0.00, 0.9: 0.02, 0.5: 0.45, 0.1: 2.71, 0.05: 3.84, 0.025: 5.02, 0.01: 6.63, 0.005: 7.88 },
+ 2: { 0.995: 0.01, 0.99: 0.02, 0.975: 0.05, 0.95: 0.10, 0.9: 0.21, 0.5: 1.39, 0.1: 4.61, 0.05: 5.99, 0.025: 7.38, 0.01: 9.21, 0.005: 10.60 },
+ 3: { 0.995: 0.07, 0.99: 0.11, 0.975: 0.22, 0.95: 0.35, 0.9: 0.58, 0.5: 2.37, 0.1: 6.25, 0.05: 7.81, 0.025: 9.35, 0.01: 11.34, 0.005: 12.84 },
+ 4: { 0.995: 0.21, 0.99: 0.30, 0.975: 0.48, 0.95: 0.71, 0.9: 1.06, 0.5: 3.36, 0.1: 7.78, 0.05: 9.49, 0.025: 11.14, 0.01: 13.28, 0.005: 14.86 },
+ 5: { 0.995: 0.41, 0.99: 0.55, 0.975: 0.83, 0.95: 1.15, 0.9: 1.61, 0.5: 4.35, 0.1: 9.24, 0.05: 11.07, 0.025: 12.83, 0.01: 15.09, 0.005: 16.75 },
+ 6: { 0.995: 0.68, 0.99: 0.87, 0.975: 1.24, 0.95: 1.64, 0.9: 2.20, 0.5: 5.35, 0.1: 10.65, 0.05: 12.59, 0.025: 14.45, 0.01: 16.81, 0.005: 18.55 },
+ 7: { 0.995: 0.99, 0.99: 1.25, 0.975: 1.69, 0.95: 2.17, 0.9: 2.83, 0.5: 6.35, 0.1: 12.02, 0.05: 14.07, 0.025: 16.01, 0.01: 18.48, 0.005: 20.28 },
+ 8: { 0.995: 1.34, 0.99: 1.65, 0.975: 2.18, 0.95: 2.73, 0.9: 3.49, 0.5: 7.34, 0.1: 13.36, 0.05: 15.51, 0.025: 17.53, 0.01: 20.09, 0.005: 21.96 },
+ 9: { 0.995: 1.73, 0.99: 2.09, 0.975: 2.70, 0.95: 3.33, 0.9: 4.17, 0.5: 8.34, 0.1: 14.68, 0.05: 16.92, 0.025: 19.02, 0.01: 21.67, 0.005: 23.59 },
+ 10: { 0.995: 2.16, 0.99: 2.56, 0.975: 3.25, 0.95: 3.94, 0.9: 4.87, 0.5: 9.34, 0.1: 15.99, 0.05: 18.31, 0.025: 20.48, 0.01: 23.21, 0.005: 25.19 },
+ 11: { 0.995: 2.60, 0.99: 3.05, 0.975: 3.82, 0.95: 4.57, 0.9: 5.58, 0.5: 10.34, 0.1: 17.28, 0.05: 19.68, 0.025: 21.92, 0.01: 24.72, 0.005: 26.76 },
+ 12: { 0.995: 3.07, 0.99: 3.57, 0.975: 4.40, 0.95: 5.23, 0.9: 6.30, 0.5: 11.34, 0.1: 18.55, 0.05: 21.03, 0.025: 23.34, 0.01: 26.22, 0.005: 28.30 },
+ 13: { 0.995: 3.57, 0.99: 4.11, 0.975: 5.01, 0.95: 5.89, 0.9: 7.04, 0.5: 12.34, 0.1: 19.81, 0.05: 22.36, 0.025: 24.74, 0.01: 27.69, 0.005: 29.82 },
+ 14: { 0.995: 4.07, 0.99: 4.66, 0.975: 5.63, 0.95: 6.57, 0.9: 7.79, 0.5: 13.34, 0.1: 21.06, 0.05: 23.68, 0.025: 26.12, 0.01: 29.14, 0.005: 31.32 },
+ 15: { 0.995: 4.60, 0.99: 5.23, 0.975: 6.27, 0.95: 7.26, 0.9: 8.55, 0.5: 14.34, 0.1: 22.31, 0.05: 25.00, 0.025: 27.49, 0.01: 30.58, 0.005: 32.80 },
+ 16: { 0.995: 5.14, 0.99: 5.81, 0.975: 6.91, 0.95: 7.96, 0.9: 9.31, 0.5: 15.34, 0.1: 23.54, 0.05: 26.30, 0.025: 28.85, 0.01: 32.00, 0.005: 34.27 },
+ 17: { 0.995: 5.70, 0.99: 6.41, 0.975: 7.56, 0.95: 8.67, 0.9: 10.09, 0.5: 16.34, 0.1: 24.77, 0.05: 27.59, 0.025: 30.19, 0.01: 33.41, 0.005: 35.72 },
+ 18: { 0.995: 6.26, 0.99: 7.01, 0.975: 8.23, 0.95: 9.39, 0.9: 10.87, 0.5: 17.34, 0.1: 25.99, 0.05: 28.87, 0.025: 31.53, 0.01: 34.81, 0.005: 37.16 },
+ 19: { 0.995: 6.84, 0.99: 7.63, 0.975: 8.91, 0.95: 10.12, 0.9: 11.65, 0.5: 18.34, 0.1: 27.20, 0.05: 30.14, 0.025: 32.85, 0.01: 36.19, 0.005: 38.58 },
+ 20: { 0.995: 7.43, 0.99: 8.26, 0.975: 9.59, 0.95: 10.85, 0.9: 12.44, 0.5: 19.34, 0.1: 28.41, 0.05: 31.41, 0.025: 34.17, 0.01: 37.57, 0.005: 40.00 },
+ 21: { 0.995: 8.03, 0.99: 8.90, 0.975: 10.28, 0.95: 11.59, 0.9: 13.24, 0.5: 20.34, 0.1: 29.62, 0.05: 32.67, 0.025: 35.48, 0.01: 38.93, 0.005: 41.40 },
+ 22: { 0.995: 8.64, 0.99: 9.54, 0.975: 10.98, 0.95: 12.34, 0.9: 14.04, 0.5: 21.34, 0.1: 30.81, 0.05: 33.92, 0.025: 36.78, 0.01: 40.29, 0.005: 42.80 },
+ 23: { 0.995: 9.26, 0.99: 10.20, 0.975: 11.69, 0.95: 13.09, 0.9: 14.85, 0.5: 22.34, 0.1: 32.01, 0.05: 35.17, 0.025: 38.08, 0.01: 41.64, 0.005: 44.18 },
+ 24: { 0.995: 9.89, 0.99: 10.86, 0.975: 12.40, 0.95: 13.85, 0.9: 15.66, 0.5: 23.34, 0.1: 33.20, 0.05: 36.42, 0.025: 39.36, 0.01: 42.98, 0.005: 45.56 },
+ 25: { 0.995: 10.52, 0.99: 11.52, 0.975: 13.12, 0.95: 14.61, 0.9: 16.47, 0.5: 24.34, 0.1: 34.28, 0.05: 37.65, 0.025: 40.65, 0.01: 44.31, 0.005: 46.93 },
+ 26: { 0.995: 11.16, 0.99: 12.20, 0.975: 13.84, 0.95: 15.38, 0.9: 17.29, 0.5: 25.34, 0.1: 35.56, 0.05: 38.89, 0.025: 41.92, 0.01: 45.64, 0.005: 48.29 },
+ 27: { 0.995: 11.81, 0.99: 12.88, 0.975: 14.57, 0.95: 16.15, 0.9: 18.11, 0.5: 26.34, 0.1: 36.74, 0.05: 40.11, 0.025: 43.19, 0.01: 46.96, 0.005: 49.65 },
+ 28: { 0.995: 12.46, 0.99: 13.57, 0.975: 15.31, 0.95: 16.93, 0.9: 18.94, 0.5: 27.34, 0.1: 37.92, 0.05: 41.34, 0.025: 44.46, 0.01: 48.28, 0.005: 50.99 },
+ 29: { 0.995: 13.12, 0.99: 14.26, 0.975: 16.05, 0.95: 17.71, 0.9: 19.77, 0.5: 28.34, 0.1: 39.09, 0.05: 42.56, 0.025: 45.72, 0.01: 49.59, 0.005: 52.34 },
+ 30: { 0.995: 13.79, 0.99: 14.95, 0.975: 16.79, 0.95: 18.49, 0.9: 20.60, 0.5: 29.34, 0.1: 40.26, 0.05: 43.77, 0.025: 46.98, 0.01: 50.89, 0.005: 53.67 },
+ 40: { 0.995: 20.71, 0.99: 22.16, 0.975: 24.43, 0.95: 26.51, 0.9: 29.05, 0.5: 39.34, 0.1: 51.81, 0.05: 55.76, 0.025: 59.34, 0.01: 63.69, 0.005: 66.77 },
+ 50: { 0.995: 27.99, 0.99: 29.71, 0.975: 32.36, 0.95: 34.76, 0.9: 37.69, 0.5: 49.33, 0.1: 63.17, 0.05: 67.50, 0.025: 71.42, 0.01: 76.15, 0.005: 79.49 },
+ 60: { 0.995: 35.53, 0.99: 37.48, 0.975: 40.48, 0.95: 43.19, 0.9: 46.46, 0.5: 59.33, 0.1: 74.40, 0.05: 79.08, 0.025: 83.30, 0.01: 88.38, 0.005: 91.95 },
+ 70: { 0.995: 43.28, 0.99: 45.44, 0.975: 48.76, 0.95: 51.74, 0.9: 55.33, 0.5: 69.33, 0.1: 85.53, 0.05: 90.53, 0.025: 95.02, 0.01: 100.42, 0.005: 104.22 },
+ 80: { 0.995: 51.17, 0.99: 53.54, 0.975: 57.15, 0.95: 60.39, 0.9: 64.28, 0.5: 79.33, 0.1: 96.58, 0.05: 101.88, 0.025: 106.63, 0.01: 112.33, 0.005: 116.32 },
+ 90: { 0.995: 59.20, 0.99: 61.75, 0.975: 65.65, 0.95: 69.13, 0.9: 73.29, 0.5: 89.33, 0.1: 107.57, 0.05: 113.14, 0.025: 118.14, 0.01: 124.12, 0.005: 128.30 },
+ 100: { 0.995: 67.33, 0.99: 70.06, 0.975: 74.22, 0.95: 77.93, 0.9: 82.36, 0.5: 99.33, 0.1: 118.50, 0.05: 124.34, 0.025: 129.56, 0.01: 135.81, 0.005: 140.17 }
+ };
The χ2 (Chi-Squared) Goodness-of-Fit Test
+uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies
+(that is, counts of observations), each squared and divided by the number of observations expected given the
+hypothesized distribution. The resulting χ2 statistic, chi_squared
, can be compared to the chi-squared distribution
+to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one
+takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic
+follows, approximately, a chi-square distribution with (k − c) degrees of freedom where k
is the number of non-empty
+cells and c
is the number of estimated parameters for the distribution.
function chi_squared_goodness_of_fit(data, distribution_type, significance) {
Estimate from the sample data, a weighted mean.
+ + var input_mean = mean(data),
Calculated value of the χ2 statistic.
+ + chi_squared = 0,
Degrees of freedom, calculated as (number of class intervals - +number of hypothesized distribution parameters estimated - 1)
+ + degrees_of_freedom,
Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test.
+Lose one degree of freedom for estimating lambda
from the sample data.
c = 1,
The hypothesized distribution. +Generate the hypothesized distribution.
+ + hypothesized_distribution = distribution_type(input_mean),
+ observed_frequencies = [],
+ expected_frequencies = [],
+ k;
Create an array holding a histogram from the sample data, of
+the form { value: numberOfOcurrences }
for (var i = 0; i < data.length; i++) {
+ if (observed_frequencies[data[i]] === undefined) {
+ observed_frequencies[data[i]] = 0;
+ }
+ observed_frequencies[data[i]]++;
+ }
The histogram we created might be sparse - there might be gaps +between values. So we iterate through the histogram, making +sure that instead of undefined, gaps have 0 values.
+ + for (i = 0; i < observed_frequencies.length; i++) {
+ if (observed_frequencies[i] === undefined) {
+ observed_frequencies[i] = 0;
+ }
+ }
Create an array holding a histogram of expected data given the +sample size and hypothesized distribution.
+ + for (k in hypothesized_distribution) {
+ if (k in observed_frequencies) {
+ expected_frequencies[k] = hypothesized_distribution[k] * data.length;
+ }
+ }
Working backward through the expected frequencies, collapse classes +if less than three observations are expected for a class. +This transformation is applied to the observed frequencies as well.
+ + for (k = expected_frequencies.length - 1; k >= 0; k--) {
+ if (expected_frequencies[k] < 3) {
+ expected_frequencies[k - 1] += expected_frequencies[k];
+ expected_frequencies.pop();
+
+ observed_frequencies[k - 1] += observed_frequencies[k];
+ observed_frequencies.pop();
+ }
+ }
Iterate through the squared differences between observed & expected
+frequencies, accumulating the chi_squared
statistic.
for (k = 0; k < observed_frequencies.length; k++) {
+ chi_squared += Math.pow(
+ observed_frequencies[k] - expected_frequencies[k], 2) /
+ expected_frequencies[k];
+ }
Calculate degrees of freedom for this test and look it up in the
+chi_squared_distribution_table
in order to
+accept or reject the goodness-of-fit of the hypothesized distribution.
degrees_of_freedom = observed_frequencies.length - c - 1;
+ return chi_squared_distribution_table[degrees_of_freedom][significance] < chi_squared;
+ }
Mixin simple_statistics to a single Array instance if provided +or the Array native object if not. This is an optional +feature that lets you treat simple_statistics as a native feature +of Javascript.
+ + function mixin(array) {
+ var support = !!(Object.defineProperty && Object.defineProperties);
+ if (!support) throw new Error('without defineProperty, simple-statistics cannot be mixed in');
only methods which work on basic arrays in a single step +are supported
+ + var arrayMethods = ['median', 'standard_deviation', 'sum',
+ 'sample_skewness',
+ 'mean', 'min', 'max', 'quantile', 'geometric_mean',
+ 'harmonic_mean'];
create a closure with a method name so that a reference
+like arrayMethods[i]
doesn’t follow the loop increment
function wrap(method) {
+ return function() {
cast any arguments into an array, since they’re +natively objects
+ + var args = Array.prototype.slice.apply(arguments);
make the first argument the array itself
+ + args.unshift(this);
return the result of the ss method
+ + return ss[method].apply(ss, args);
+ };
+ }
select object to extend
+ + var extending;
+ if (array) {
create a shallow copy of the array so that our internal +operations do not change it by reference
+ + extending = array.slice();
+ } else {
+ extending = Array.prototype;
+ }
for each array function, define a function that gets
+the array as the first argument.
+We use defineProperty
+because it allows these properties to be non-enumerable:
+for (var in x)
loops will not run into problems with this
+implementation.
for (var i = 0; i < arrayMethods.length; i++) {
+ Object.defineProperty(extending, arrayMethods[i], {
+ value: wrap(arrayMethods[i]),
+ configurable: true,
+ enumerable: false,
+ writable: true
+ });
+ }
+
+ return extending;
+ }
+
+ ss.linear_regression = linear_regression;
+ ss.standard_deviation = standard_deviation;
+ ss.r_squared = r_squared;
+ ss.median = median;
+ ss.mean = mean;
+ ss.mode = mode;
+ ss.min = min;
+ ss.max = max;
+ ss.sum = sum;
+ ss.quantile = quantile;
+ ss.quantile_sorted = quantile_sorted;
+ ss.iqr = iqr;
+ ss.mad = mad;
+
+ ss.chunk = chunk;
+ ss.shuffle = shuffle;
+ ss.shuffle_in_place = shuffle_in_place;
+
+ ss.sample = sample;
+
+ ss.sample_covariance = sample_covariance;
+ ss.sample_correlation = sample_correlation;
+ ss.sample_variance = sample_variance;
+ ss.sample_standard_deviation = sample_standard_deviation;
+ ss.sample_skewness = sample_skewness;
+
+ ss.geometric_mean = geometric_mean;
+ ss.harmonic_mean = harmonic_mean;
+ ss.variance = variance;
+ ss.t_test = t_test;
+ ss.t_test_two_sample = t_test_two_sample;
jenks
+ + ss.jenksMatrices = jenksMatrices;
+ ss.jenksBreaks = jenksBreaks;
+ ss.jenks = jenks;
+
+ ss.bayesian = bayesian;
Distribution-related methods
+ + ss.epsilon = epsilon; // We make ε available to the test suite.
+ ss.factorial = factorial;
+ ss.bernoulli_distribution = bernoulli_distribution;
+ ss.binomial_distribution = binomial_distribution;
+ ss.poisson_distribution = poisson_distribution;
+ ss.chi_squared_goodness_of_fit = chi_squared_goodness_of_fit;
Normal distribution
+ + ss.z_score = z_score;
+ ss.cumulative_std_normal_probability = cumulative_std_normal_probability;
+ ss.standard_normal_table = standard_normal_table;
Alias this into its common name
+ + ss.average = mean;
+ ss.interquartile_range = iqr;
+ ss.mixin = mixin;
+ ss.median_absolute_deviation = mad;
+
+})(this);