diff --git a/CHANGELOG.md b/CHANGELOG.md
index a99ede5d..69530790 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+# [1.24.0](https://github.com/javascriptdata/scikit.js/compare/v1.23.0...v1.24.0) (2022-05-22)
+
+
+### Features
+
+* sgd classifier can not train on categorical variables, as well as one-hot encoded variables ([10141cd](https://github.com/javascriptdata/scikit.js/commit/10141cd9213a2da7aa1d3122f3143d29abb574d8))
+
# [1.23.0](https://github.com/javascriptdata/scikit.js/compare/v1.22.0...v1.23.0) (2022-05-19)
diff --git a/README.md b/README.md
index 3af37d66..564093d7 100644
--- a/README.md
+++ b/README.md
@@ -21,52 +21,63 @@ Documentation site: [www.scikitjs.org](https://www.scikitjs.org)
For use with modern bundlers in a frontend application, simply
```bash
-npm i scikitjs
+npm i @tensorflow/tfjs scikitjs
```
-Usage is similar to other js libraries.
+We depend on the tensorflow library in order to make our calculations fast, but we don't ship it in our bundle.
+We use it as a peer dependency. General usage is as follows.
```js
-import { LinearRegression } from 'scikitjs'
+import * as tf from '@tensorflow/tfjs'
+import * as sk from 'scikitjs'
+sk.setBackend(tf)
```
+This allows us to build a library that can be used in Deno, Node, and the browser with the same configuration.
### Backend Users
-For Node.js users who wish to bind to the Tensorflow C++ library, simply
+For Node.js users who wish to bind to the Tensorflow C++ library, simply import the tensorflow C++ version, and use that as the tf library
```bash
-npm i scikitjs
+npm i @tensorflow/tfjs-node scikitjs
```
-But then import the node bindings
-
```js
-import { LinearRegression } from 'scikitjs/node'
+const tf = require('@tensorflow/tfjs-node')
+const sk = require('scikitjs')
+sk.setBackend(tf)
```
-The `scikitjs/node` path uses the new "exports" feature of node (which is available in node v13.3+).
-If you are using an older version of node, simply pass in the path to the cjs build
+Note: If you have ESM enabled (by setting type="module" in your package.json), then you can consume this library with import / export, like in the following code block.
```js
-import { LinearRegression } from 'scikitjs/dist/cjs/index.js'
+import * as tf from '@tensorflow/tfjs-node'
+import * as sk from 'scikitjs'
+sk.setBackend(tf)
```
+
### Script src
For those that wish to use script src tags, simply
```html
```
## Simple Example
```js
-import { LinearRegression } from 'scikitjs'
+import * as tf from '@tensorflow/tfjs'
+import { setBackend, LinearRegression } from 'scikitjs'
+setBackend(tf)
const lr = new LinearRegression({ fitIntercept: false })
const X = [[1], [2]] // 2D Matrix with a single column vector
@@ -124,7 +135,9 @@ Turns into
#### JavaScript
```js
-import { LinearRegression } from 'scikitjs'
+import * as tf from '@tensorflow/tfjs'
+import { setBackend, LinearRegression } from 'scikitjs'
+setBackend(tf)
let X = [[1], [2]]
let y = [10, 20]
@@ -154,7 +167,9 @@ Turns into
#### JavaScript
```js
-import { LinearRegression } from 'scikitjs'
+import * as tf from '@tensorflow/tfjs'
+import { setBackend, LinearRegression } from 'scikitjs'
+setBackend(tf)
let X = [[1], [2]]
let y = [10, 20]
@@ -189,7 +204,9 @@ Turns into
#### JavaScript
```js
-import { LogisticRegression } from 'scikitjs'
+import * as tf from '@tensorflow/tfjs'
+import { setBackend, LogisticRegression } from 'scikitjs'
+setBackend(tf)
let X = [[1], [-1]]
let y = [1, 0]
diff --git a/docs/docs/python.md b/docs/docs/python.md
index deb4ee06..081808c3 100644
--- a/docs/docs/python.md
+++ b/docs/docs/python.md
@@ -47,7 +47,9 @@ Turns into
#### javascript
```js
-import { LinearRegression } from 'scikitjs'
+import * as tf from '@tensorflow/tfjs'
+import { LinearRegression, setBackend } from 'scikitjs'
+setBackend(tf)
let X = [[1], [2]]
let y = [10, 20]
@@ -77,7 +79,9 @@ Turns into
#### javascript
```js
-import { LinearRegression } from 'scikitjs'
+import * as tf from '@tensorflow/tfjs'
+import { LinearRegression, setBackend } from 'scikitjs'
+setBackend(tf)
let X = [[1], [2]]
let y = [10, 20]
@@ -112,7 +116,9 @@ Turns into
#### javascript
```js
-import { LogisticRegression } from 'scikitjs'
+import * as tf from '@tensorflow/tfjs'
+import { LogisticRegression, setBackend } from 'scikitjs'
+setBackend(tf)
let X = [[1], [-1]]
let y = [1, 0]
diff --git a/docs/docs/tutorial.md b/docs/docs/tutorial.md
index 75ca34ab..8b1014b1 100644
--- a/docs/docs/tutorial.md
+++ b/docs/docs/tutorial.md
@@ -8,16 +8,16 @@ Let's discover **Scikit.js in less than 5 minutes**.
## Getting Started
-Get started by **installing the library**.
+Get started by **installing the library as well as it's dependencies**.
```shell
-npm install scikitjs
+npm install scikitjs @tensorflow/tfjs
```
or
```shell
-yarn add scikitjs
+yarn add scikitjs @tensorflow/tfjs
```
## Build a model
@@ -25,8 +25,12 @@ yarn add scikitjs
Build a simple Linear Regression
```js
-import { LinearRegression } from 'scikitjs'
+// import tensorflow and register it as the backend
+import * as tf from '@tensorflow/tfjs'
+import { LinearRegression, setBackend } from 'scikitjs'
+setBackend(tf)
+// Perform a linear regression
let X = [
[2, 3],
[1, 4],
diff --git a/package-lock.json b/package-lock.json
index 04e3044b..8fddc134 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "scikitjs",
- "version": "1.23.0",
+ "version": "1.24.0",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "scikitjs",
- "version": "1.23.0",
+ "version": "1.24.0",
"hasInstallScript": true,
"license": "ISC",
"dependencies": {
diff --git a/package.json b/package.json
index 040769cd..9537fb55 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "scikitjs",
- "version": "1.23.0",
+ "version": "1.24.0",
"description": "Scikit-Learn for JS",
"output": {
"node": "dist/node/index.js",
diff --git a/src/linear_model/LogisticRegression.test.ts b/src/linear_model/LogisticRegression.test.ts
index 1d38ba59..80b39648 100644
--- a/src/linear_model/LogisticRegression.test.ts
+++ b/src/linear_model/LogisticRegression.test.ts
@@ -47,6 +47,68 @@ describe('LogisticRegression', function () {
expect(results.arraySync()).toEqual([0, 0, 0, 1, 1, 1])
expect(logreg.score(X, y) > 0.5).toBe(true)
}, 30000)
+ it('Test of the function used with 2 classes (one hot)', async function () {
+ let X = [
+ [0, -1],
+ [1, 0],
+ [1, 1],
+ [1, -1],
+ [2, 0],
+ [2, 1],
+ [2, -1],
+ [3, 2],
+ [0, 4],
+ [1, 3],
+ [1, 4],
+ [1, 5],
+ [2, 3],
+ [2, 4],
+ [2, 5],
+ [3, 4]
+ ]
+ let y = [
+ [1, 0],
+ [1, 0],
+ [1, 0],
+ [1, 0],
+ [1, 0],
+ [1, 0],
+ [1, 0],
+ [1, 0],
+ [0, 1],
+ [0, 1],
+ [0, 1],
+ [0, 1],
+ [0, 1],
+ [0, 1],
+ [0, 1],
+ [0, 1]
+ ]
+
+ let Xtest = [
+ [0, -2],
+ [1, 0.5],
+ [1.5, -1],
+ [1, 4.5],
+ [2, 3.5],
+ [1.5, 5]
+ ]
+
+ let logreg = new LogisticRegression({ penalty: 'none' })
+ await logreg.fit(X, y)
+ let probabilities = logreg.predictProba(X)
+ expect(probabilities instanceof tf.Tensor).toBe(true)
+ let results = logreg.predict(Xtest) // compute results of the training set
+ expect(results.arraySync()).toEqual([
+ [1, 0],
+ [1, 0],
+ [1, 0],
+ [0, 1],
+ [0, 1],
+ [0, 1]
+ ])
+ expect(logreg.score(X, y) > 0.5).toBe(true)
+ }, 30000)
it('Test of the prediction with 3 classes', async function () {
let X = [
[0, -1],
diff --git a/src/linear_model/SgdClassifier.ts b/src/linear_model/SgdClassifier.ts
index 56a55e16..bc9ee150 100644
--- a/src/linear_model/SgdClassifier.ts
+++ b/src/linear_model/SgdClassifier.ts
@@ -13,7 +13,10 @@
* ==========================================================================
*/
-import { convertToNumericTensor1D, convertToNumericTensor2D } from '../utils'
+import {
+ convertToNumericTensor1D_2D,
+ convertToNumericTensor2D
+} from '../utils'
import {
Scikit2D,
Scikit1D,
@@ -23,8 +26,7 @@ import {
Tensor2D,
Tensor,
ModelCompileArgs,
- ModelFitArgs,
- RecursiveArray
+ ModelFitArgs
} from '../types'
import { OneHotEncoder } from '../preprocessing/OneHotEncoder'
import { assert } from '../typesUtils'
@@ -103,6 +105,7 @@ export class SGDClassifier extends ClassifierMixin {
lossType: LossTypes
oneHot: OneHotEncoder
tf: any
+ isMultiOutput: boolean
constructor({
modelFitArgs,
@@ -119,6 +122,7 @@ export class SGDClassifier extends ClassifierMixin {
this.denseLayerArgs = denseLayerArgs
this.optimizerType = optimizerType
this.lossType = lossType
+ this.isMultiOutput = false
// Next steps: Implement "drop" mechanics for OneHotEncoder
// There is a possibility to do a drop => if_binary which would
// squash down on the number of variables that we'd have to learn
@@ -200,12 +204,17 @@ export class SGDClassifier extends ClassifierMixin {
* // lr model weights have been updated
*/
- public async fit(X: Scikit2D, y: Scikit1D): Promise {
+ public async fit(
+ X: Scikit2D,
+ y: Scikit1D | Scikit2D
+ ): Promise {
let XTwoD = convertToNumericTensor2D(X)
- let yOneD = convertToNumericTensor1D(y)
+ let yOneD = convertToNumericTensor1D_2D(y)
const yTwoD = this.initializeModelForClassification(yOneD)
-
+ if (yOneD.shape.length > 1) {
+ this.isMultiOutput = true
+ }
if (this.model.layers.length === 0) {
this.initializeModel(XTwoD, yTwoD)
}
@@ -344,6 +353,9 @@ export class SGDClassifier extends ClassifierMixin {
public predict(X: Scikit2D): Tensor1D {
assert(this.model.layers.length > 0, 'Need to call "fit" before "predict"')
const y2D = this.predictProba(X)
+ if (this.isMultiOutput) {
+ return this.tf.oneHot(y2D.argMax(1), y2D.shape[1])
+ }
return this.tf.tensor1d(this.oneHot.inverseTransform(y2D))
}
@@ -418,10 +430,4 @@ export class SGDClassifier extends ClassifierMixin {
return intercept
}
-
- private getModelWeight(): Promise> {
- return Promise.all(
- this.model.getWeights().map((weight: any) => weight.array())
- )
- }
}
diff --git a/src/mixins.ts b/src/mixins.ts
index 2d71b816..225f7f74 100644
--- a/src/mixins.ts
+++ b/src/mixins.ts
@@ -1,6 +1,8 @@
import { Scikit2D, Scikit1D, Tensor2D, Tensor1D } from './types'
import { r2Score, accuracyScore } from './metrics/metrics'
import { Serialize } from './simpleSerializer'
+import { assert, isScikit2D } from './typesUtils'
+import { convertToNumericTensor1D_2D } from './utils'
export class TransformerMixin extends Serialize {
// We assume that fit and transform exist
[x: string]: any
@@ -35,8 +37,17 @@ export class ClassifierMixin extends Serialize {
[x: string]: any
EstimatorType = 'classifier'
- public score(X: Scikit2D, y: Scikit1D): number {
+ public score(X: Scikit2D, y: Scikit1D | Scikit2D): number {
const yPred = this.predict(X)
+ const yTrue = convertToNumericTensor1D_2D(y)
+ assert(
+ yPred.shape.length === yTrue.shape.length,
+ "The shape of the model output doesn't match the shape of the actual y values"
+ )
+
+ if (isScikit2D(y)) {
+ return accuracyScore(yTrue.argMax(1) as Scikit1D, yPred.argMax(1))
+ }
return accuracyScore(y, yPred)
}
}