From 86a8f06390543a96d2e5fc3cc1a34a320dc3696f Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Wed, 18 May 2022 22:21:51 -0700 Subject: [PATCH 1/8] updated the docs --- README.md | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 3af37d66..fe35a3fb 100644 --- a/README.md +++ b/README.md @@ -24,15 +24,18 @@ For use with modern bundlers in a frontend application, simply npm i scikitjs ``` -Usage is similar to other js libraries. +Usage is similar to other js libraries. We depend on the tensorflow library in order to make our calculations fast, but we don't ship it in our bundle. We use it as a peer dependency. General usage is as follows. ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs' +import { LinearRegression, setBackend } from 'scikitjs' +setBackend(tf) ``` +This allows us to build a library that can be used in Deno, Node, and the browser with the same configuration. ### Backend Users -For Node.js users who wish to bind to the Tensorflow C++ library, simply +For Node.js users who wish to bind to the Tensorflow C++ library, simply import the tensorflow C++ version, and use that as the tf library ```bash npm i scikitjs @@ -41,14 +44,9 @@ npm i scikitjs But then import the node bindings ```js -import { LinearRegression } from 'scikitjs/node' -``` - -The `scikitjs/node` path uses the new "exports" feature of node (which is available in node v13.3+). -If you are using an older version of node, simply pass in the path to the cjs build - -```js -import { LinearRegression } from 'scikitjs/dist/cjs/index.js' +import * as tf from '@tensorflow/tfjs-node' +import { LinearRegression, setBackend } from 'scikitjs' +setBackend(tf) ``` ### Script src @@ -57,16 +55,19 @@ For those that wish to use script src tags, simply ```html ``` ## Simple Example ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs-node' +import { LinearRegression, setBackend } from 'scikitjs' +setBackend(tf) const lr = new LinearRegression({ fitIntercept: false }) const X = [[1], [2]] // 2D Matrix with a single column vector @@ -124,7 +125,9 @@ Turns into #### JavaScript ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs-node' +import { LinearRegression, setBackend } from 'scikitjs' +setBackend(tf) let X = [[1], [2]] let y = [10, 20] @@ -154,7 +157,9 @@ Turns into #### JavaScript ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs-node' +import { LinearRegression, setBackend } from 'scikitjs' +setBackend(tf) let X = [[1], [2]] let y = [10, 20] @@ -189,7 +194,9 @@ Turns into #### JavaScript ```js -import { LogisticRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs-node' +import { LogisticRegression, setBackend } from 'scikitjs' +setBackend(tf) let X = [[1], [-1]] let y = [1, 0] From f66668d4160cb14a7d76cade8593d6290318218e Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Wed, 18 May 2022 22:37:40 -0700 Subject: [PATCH 2/8] updated readme --- README.md | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 3af37d66..d755753a 100644 --- a/README.md +++ b/README.md @@ -21,34 +21,33 @@ Documentation site: [www.scikitjs.org](https://www.scikitjs.org) For use with modern bundlers in a frontend application, simply ```bash +npm i @tensorflow/tfjs npm i scikitjs ``` -Usage is similar to other js libraries. +We depend on the tensorflow library in order to make our calculations fast, but we don't ship it in our bundle. +We use it as a peer dependency. General usage is as follows. ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs' +import * as sk from 'scikitjs' +sk.setBackend(tf) ``` +This allows us to build a library that can be used in Deno, Node, and the browser with the same configuration. ### Backend Users -For Node.js users who wish to bind to the Tensorflow C++ library, simply +For Node.js users who wish to bind to the Tensorflow C++ library, simply import the tensorflow C++ version, and use that as the tf library ```bash +npm i @tensorflow/tfjs-node npm i scikitjs ``` -But then import the node bindings - -```js -import { LinearRegression } from 'scikitjs/node' -``` - -The `scikitjs/node` path uses the new "exports" feature of node (which is available in node v13.3+). -If you are using an older version of node, simply pass in the path to the cjs build - ```js -import { LinearRegression } from 'scikitjs/dist/cjs/index.js' +import * as tf from '@tensorflow/tfjs-node' +import * as sk from 'scikitjs' +sk.setBackend(tf) ``` ### Script src @@ -57,16 +56,21 @@ For those that wish to use script src tags, simply ```html ``` ## Simple Example ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs-node' +import { setBackend, LinearRegression } from 'scikitjs' +setBackend(tf) const lr = new LinearRegression({ fitIntercept: false }) const X = [[1], [2]] // 2D Matrix with a single column vector @@ -124,7 +128,9 @@ Turns into #### JavaScript ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs-node' +import { setBackend, LinearRegression } from 'scikitjs' +setBackend(tf) let X = [[1], [2]] let y = [10, 20] @@ -154,7 +160,9 @@ Turns into #### JavaScript ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs-node' +import { setBackend, LinearRegression } from 'scikitjs' +setBackend(tf) let X = [[1], [2]] let y = [10, 20] @@ -189,7 +197,9 @@ Turns into #### JavaScript ```js -import { LogisticRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs-node' +import { setBackend, LogisticRegression } from 'scikitjs' +setBackend(tf) let X = [[1], [-1]] let y = [1, 0] From f20e5c42e0be86a38429623b4c9df9c1edaf24a8 Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Sat, 21 May 2022 10:22:31 -0700 Subject: [PATCH 3/8] more updates to readme --- README.md | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index d755753a..07fd6a6d 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,7 @@ Documentation site: [www.scikitjs.org](https://www.scikitjs.org) For use with modern bundlers in a frontend application, simply ```bash -npm i @tensorflow/tfjs -npm i scikitjs +npm i @tensorflow/tfjs scikitjs ``` We depend on the tensorflow library in order to make our calculations fast, but we don't ship it in our bundle. @@ -40,16 +39,24 @@ This allows us to build a library that can be used in Deno, Node, and the browse For Node.js users who wish to bind to the Tensorflow C++ library, simply import the tensorflow C++ version, and use that as the tf library ```bash -npm i @tensorflow/tfjs-node -npm i scikitjs +npm i @tensorflow/tfjs-node scikitjs ``` +```js +const tf = require('@tensorflow/tfjs-node') +const sk = require('scikitjs') +sk.setBackend(tf) +``` + +Note: If you have ESM enabled (by setting type="module" in your package.json), then you can consume this libary with import / export, like in the following code block. + ```js import * as tf from '@tensorflow/tfjs-node' import * as sk from 'scikitjs' sk.setBackend(tf) ``` + ### Script src For those that wish to use script src tags, simply @@ -68,7 +75,7 @@ For those that wish to use script src tags, simply ## Simple Example ```js -import * as tf from '@tensorflow/tfjs-node' +import * as tf from '@tensorflow/tfjs' import { setBackend, LinearRegression } from 'scikitjs' setBackend(tf) @@ -128,7 +135,7 @@ Turns into #### JavaScript ```js -import * as tf from '@tensorflow/tfjs-node' +import * as tf from '@tensorflow/tfjs' import { setBackend, LinearRegression } from 'scikitjs' setBackend(tf) @@ -160,7 +167,7 @@ Turns into #### JavaScript ```js -import * as tf from '@tensorflow/tfjs-node' +import * as tf from '@tensorflow/tfjs' import { setBackend, LinearRegression } from 'scikitjs' setBackend(tf) @@ -197,7 +204,7 @@ Turns into #### JavaScript ```js -import * as tf from '@tensorflow/tfjs-node' +import * as tf from '@tensorflow/tfjs' import { setBackend, LogisticRegression } from 'scikitjs' setBackend(tf) From 45c4305983c78f876457c35be602362a6935b6f8 Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Sat, 21 May 2022 12:20:13 -0700 Subject: [PATCH 4/8] updated docs --- README.md | 2 +- docs/docs/tutorial.md | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 07fd6a6d..564093d7 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ const sk = require('scikitjs') sk.setBackend(tf) ``` -Note: If you have ESM enabled (by setting type="module" in your package.json), then you can consume this libary with import / export, like in the following code block. +Note: If you have ESM enabled (by setting type="module" in your package.json), then you can consume this library with import / export, like in the following code block. ```js import * as tf from '@tensorflow/tfjs-node' diff --git a/docs/docs/tutorial.md b/docs/docs/tutorial.md index 75ca34ab..d2517358 100644 --- a/docs/docs/tutorial.md +++ b/docs/docs/tutorial.md @@ -11,13 +11,13 @@ Let's discover **Scikit.js in less than 5 minutes**. Get started by **installing the library**. ```shell -npm install scikitjs +npm install scikitjs @tensorflow/tfjs ``` or ```shell -yarn add scikitjs +yarn add scikitjs @tensorflow/tfjs ``` ## Build a model @@ -25,7 +25,9 @@ yarn add scikitjs Build a simple Linear Regression ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs' +import { LinearRegression, setBackend } from 'scikitjs' +setBackend(tf) let X = [ [2, 3], From 81634833bb6063468e8668aba5483e0b0e02eef4 Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Sat, 21 May 2022 12:27:50 -0700 Subject: [PATCH 5/8] better tutorial docs --- docs/docs/tutorial.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/docs/tutorial.md b/docs/docs/tutorial.md index d2517358..8b1014b1 100644 --- a/docs/docs/tutorial.md +++ b/docs/docs/tutorial.md @@ -8,7 +8,7 @@ Let's discover **Scikit.js in less than 5 minutes**. ## Getting Started -Get started by **installing the library**. +Get started by **installing the library as well as it's dependencies**. ```shell npm install scikitjs @tensorflow/tfjs @@ -25,10 +25,12 @@ yarn add scikitjs @tensorflow/tfjs Build a simple Linear Regression ```js +// import tensorflow and register it as the backend import * as tf from '@tensorflow/tfjs' import { LinearRegression, setBackend } from 'scikitjs' setBackend(tf) +// Perform a linear regression let X = [ [2, 3], [1, 4], From 63a2197f877be21399e0dee3206729ed424552d0 Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Sat, 21 May 2022 12:49:54 -0700 Subject: [PATCH 6/8] more updates --- docs/docs/python.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/docs/python.md b/docs/docs/python.md index deb4ee06..081808c3 100644 --- a/docs/docs/python.md +++ b/docs/docs/python.md @@ -47,7 +47,9 @@ Turns into #### javascript ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs' +import { LinearRegression, setBackend } from 'scikitjs' +setBackend(tf) let X = [[1], [2]] let y = [10, 20] @@ -77,7 +79,9 @@ Turns into #### javascript ```js -import { LinearRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs' +import { LinearRegression, setBackend } from 'scikitjs' +setBackend(tf) let X = [[1], [2]] let y = [10, 20] @@ -112,7 +116,9 @@ Turns into #### javascript ```js -import { LogisticRegression } from 'scikitjs' +import * as tf from '@tensorflow/tfjs' +import { LogisticRegression, setBackend } from 'scikitjs' +setBackend(tf) let X = [[1], [-1]] let y = [1, 0] From 10141cd9213a2da7aa1d3122f3143d29abb574d8 Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Sun, 22 May 2022 01:50:29 -0700 Subject: [PATCH 7/8] feat: sgd classifier can not train on categorical variables, as well as one-hot encoded variables --- src/linear_model/LogisticRegression.test.ts | 62 +++++++++++++++++++++ src/linear_model/SgdClassifier.ts | 30 ++++++---- src/mixins.ts | 13 ++++- 3 files changed, 92 insertions(+), 13 deletions(-) diff --git a/src/linear_model/LogisticRegression.test.ts b/src/linear_model/LogisticRegression.test.ts index 1d38ba59..80b39648 100644 --- a/src/linear_model/LogisticRegression.test.ts +++ b/src/linear_model/LogisticRegression.test.ts @@ -47,6 +47,68 @@ describe('LogisticRegression', function () { expect(results.arraySync()).toEqual([0, 0, 0, 1, 1, 1]) expect(logreg.score(X, y) > 0.5).toBe(true) }, 30000) + it('Test of the function used with 2 classes (one hot)', async function () { + let X = [ + [0, -1], + [1, 0], + [1, 1], + [1, -1], + [2, 0], + [2, 1], + [2, -1], + [3, 2], + [0, 4], + [1, 3], + [1, 4], + [1, 5], + [2, 3], + [2, 4], + [2, 5], + [3, 4] + ] + let y = [ + [1, 0], + [1, 0], + [1, 0], + [1, 0], + [1, 0], + [1, 0], + [1, 0], + [1, 0], + [0, 1], + [0, 1], + [0, 1], + [0, 1], + [0, 1], + [0, 1], + [0, 1], + [0, 1] + ] + + let Xtest = [ + [0, -2], + [1, 0.5], + [1.5, -1], + [1, 4.5], + [2, 3.5], + [1.5, 5] + ] + + let logreg = new LogisticRegression({ penalty: 'none' }) + await logreg.fit(X, y) + let probabilities = logreg.predictProba(X) + expect(probabilities instanceof tf.Tensor).toBe(true) + let results = logreg.predict(Xtest) // compute results of the training set + expect(results.arraySync()).toEqual([ + [1, 0], + [1, 0], + [1, 0], + [0, 1], + [0, 1], + [0, 1] + ]) + expect(logreg.score(X, y) > 0.5).toBe(true) + }, 30000) it('Test of the prediction with 3 classes', async function () { let X = [ [0, -1], diff --git a/src/linear_model/SgdClassifier.ts b/src/linear_model/SgdClassifier.ts index 56a55e16..bc9ee150 100644 --- a/src/linear_model/SgdClassifier.ts +++ b/src/linear_model/SgdClassifier.ts @@ -13,7 +13,10 @@ * ========================================================================== */ -import { convertToNumericTensor1D, convertToNumericTensor2D } from '../utils' +import { + convertToNumericTensor1D_2D, + convertToNumericTensor2D +} from '../utils' import { Scikit2D, Scikit1D, @@ -23,8 +26,7 @@ import { Tensor2D, Tensor, ModelCompileArgs, - ModelFitArgs, - RecursiveArray + ModelFitArgs } from '../types' import { OneHotEncoder } from '../preprocessing/OneHotEncoder' import { assert } from '../typesUtils' @@ -103,6 +105,7 @@ export class SGDClassifier extends ClassifierMixin { lossType: LossTypes oneHot: OneHotEncoder tf: any + isMultiOutput: boolean constructor({ modelFitArgs, @@ -119,6 +122,7 @@ export class SGDClassifier extends ClassifierMixin { this.denseLayerArgs = denseLayerArgs this.optimizerType = optimizerType this.lossType = lossType + this.isMultiOutput = false // Next steps: Implement "drop" mechanics for OneHotEncoder // There is a possibility to do a drop => if_binary which would // squash down on the number of variables that we'd have to learn @@ -200,12 +204,17 @@ export class SGDClassifier extends ClassifierMixin { * // lr model weights have been updated */ - public async fit(X: Scikit2D, y: Scikit1D): Promise { + public async fit( + X: Scikit2D, + y: Scikit1D | Scikit2D + ): Promise { let XTwoD = convertToNumericTensor2D(X) - let yOneD = convertToNumericTensor1D(y) + let yOneD = convertToNumericTensor1D_2D(y) const yTwoD = this.initializeModelForClassification(yOneD) - + if (yOneD.shape.length > 1) { + this.isMultiOutput = true + } if (this.model.layers.length === 0) { this.initializeModel(XTwoD, yTwoD) } @@ -344,6 +353,9 @@ export class SGDClassifier extends ClassifierMixin { public predict(X: Scikit2D): Tensor1D { assert(this.model.layers.length > 0, 'Need to call "fit" before "predict"') const y2D = this.predictProba(X) + if (this.isMultiOutput) { + return this.tf.oneHot(y2D.argMax(1), y2D.shape[1]) + } return this.tf.tensor1d(this.oneHot.inverseTransform(y2D)) } @@ -418,10 +430,4 @@ export class SGDClassifier extends ClassifierMixin { return intercept } - - private getModelWeight(): Promise> { - return Promise.all( - this.model.getWeights().map((weight: any) => weight.array()) - ) - } } diff --git a/src/mixins.ts b/src/mixins.ts index 2d71b816..225f7f74 100644 --- a/src/mixins.ts +++ b/src/mixins.ts @@ -1,6 +1,8 @@ import { Scikit2D, Scikit1D, Tensor2D, Tensor1D } from './types' import { r2Score, accuracyScore } from './metrics/metrics' import { Serialize } from './simpleSerializer' +import { assert, isScikit2D } from './typesUtils' +import { convertToNumericTensor1D_2D } from './utils' export class TransformerMixin extends Serialize { // We assume that fit and transform exist [x: string]: any @@ -35,8 +37,17 @@ export class ClassifierMixin extends Serialize { [x: string]: any EstimatorType = 'classifier' - public score(X: Scikit2D, y: Scikit1D): number { + public score(X: Scikit2D, y: Scikit1D | Scikit2D): number { const yPred = this.predict(X) + const yTrue = convertToNumericTensor1D_2D(y) + assert( + yPred.shape.length === yTrue.shape.length, + "The shape of the model output doesn't match the shape of the actual y values" + ) + + if (isScikit2D(y)) { + return accuracyScore(yTrue.argMax(1) as Scikit1D, yPred.argMax(1)) + } return accuracyScore(y, yPred) } } From f388a67c0127a047b70971b790cfd29e7aa3f709 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Sun, 22 May 2022 09:20:17 +0000 Subject: [PATCH 8/8] chore(release): 1.24.0 [skip ci] # [1.24.0](https://github.com/javascriptdata/scikit.js/compare/v1.23.0...v1.24.0) (2022-05-22) ### Features * sgd classifier can not train on categorical variables, as well as one-hot encoded variables ([10141cd](https://github.com/javascriptdata/scikit.js/commit/10141cd9213a2da7aa1d3122f3143d29abb574d8)) --- CHANGELOG.md | 7 +++++++ package-lock.json | 4 ++-- package.json | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a99ede5d..69530790 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# [1.24.0](https://github.com/javascriptdata/scikit.js/compare/v1.23.0...v1.24.0) (2022-05-22) + + +### Features + +* sgd classifier can not train on categorical variables, as well as one-hot encoded variables ([10141cd](https://github.com/javascriptdata/scikit.js/commit/10141cd9213a2da7aa1d3122f3143d29abb574d8)) + # [1.23.0](https://github.com/javascriptdata/scikit.js/compare/v1.22.0...v1.23.0) (2022-05-19) diff --git a/package-lock.json b/package-lock.json index 04e3044b..8fddc134 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "scikitjs", - "version": "1.23.0", + "version": "1.24.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "scikitjs", - "version": "1.23.0", + "version": "1.24.0", "hasInstallScript": true, "license": "ISC", "dependencies": { diff --git a/package.json b/package.json index 040769cd..9537fb55 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scikitjs", - "version": "1.23.0", + "version": "1.24.0", "description": "Scikit-Learn for JS", "output": { "node": "dist/node/index.js",