Built BanditEnv

jscriptcoder · jscriptcoder · commit 0a7f6f314871 · 2020-10-23T17:35:45.000+02:00
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,29 @@
+# EditorConfig is awesome: https://EditorConfig.org
+
+# top-most EditorConfig file
+root = true
+
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+# Matches multiple files with brace expansion notation
+# Set default charset
+[*.{ts,py}]
+charset = utf-8
+
+# 4 space indentation
+[*.py]
+indent_style = space
+indent_size = 4
+
+# Indentation override for all JS under lib directory
+[*.ts]
+indent_style = space
+indent_size = 2
+
+# Matches the exact files either package.json or .travis.yml
+[{package.json,.travis.yml}]
+indent_style = space
+indent_size = 2
diff --git a/noderl/environments/BanditEnv.test.ts b/noderl/environments/BanditEnv.test.ts
@@ -0,0 +1,33 @@
+import test from 'tape'
+import BanditEnv from './BanditEnv'
+
+test('new BanditEnv', t => {
+  t.throws(() => new BanditEnv([], []), 'Empty probability distribution')
+  t.throws(() => new BanditEnv([1], []), 'Empty reward distribution')
+  t.throws(() => new BanditEnv([1, 0], [1]), 'Probability and Reward distribution must be the same length')
+  t.throws(() => new BanditEnv([-1, 1], [1, 1]), 'All probabilities must be greater or equal to 0')
+  t.throws(() => new BanditEnv([0, 2], [1, 1]), 'All probabilities must be less or equal to 1')
+  t.doesNotThrow(() => new BanditEnv([0, 1], [1, 1]), 'Bandit environment correctly initialized')
+  t.end()
+})
+
+test('BanditEnv#pull', t => {
+  const probs = [0.2, 0.5, 0.75]
+  const rewards = [8, 5, 2.5]
+  const bandit = new BanditEnv(probs, rewards)
+  
+  t.throws(() => bandit.pull(-1), 'Wrong accion passed in')
+
+  rewards.forEach((reward, arm) => {
+    let attempts = 0
+    while(true) {
+      attempts++
+      if(bandit.pull(arm) === reward) {
+        t.pass(`Arm ${arm} eventually paid off after ${attempts} attempts`)
+        break
+      }
+    }
+  })
+
+  t.end()
+})
diff --git a/noderl/environments/BanditEnv.ts b/noderl/environments/BanditEnv.ts
@@ -0,0 +1,34 @@
+import { min, max } from '../utils/lists'
+import { uniform } from '../utils/random'
+import { assert } from '../utils/assertion'
+
+export default class BanditEnv {
+
+  private n_arms: number
+  private p_dist: number[]
+  private r_dist: number[]
+
+  constructor(p_dist: number[], r_dist: number[]) {
+
+    assert(p_dist.length !== 0, 'Empty probability distribution')
+    assert(r_dist.length !== 0, 'Empty reward distribution')
+    assert(p_dist.length === r_dist.length, 'Probability and Reward distribution must be the same length')
+    assert(min(p_dist) >= 0 && max(p_dist) <= 1, 'All probabilities must be between 0 and 1')
+
+    this.n_arms = p_dist.length
+    this.p_dist = p_dist
+    this.r_dist = r_dist
+  }
+
+  pull(action: number) {
+    assert(action >= 0 && action < this.n_arms, `Wrong accion passed in: "${action}"`)
+
+    let reward = 0
+
+    if (uniform() < this.p_dist[action]) {
+      reward = this.r_dist[action]
+    }
+
+    return reward
+  }
+}
diff --git a/noderl/utils/assertion.ts b/noderl/utils/assertion.ts
@@ -0,0 +1,12 @@
+export class AssertionError extends Error {
+  name: string = 'AssertionError'
+  constructor(message?: string) {
+    super(message)
+  }
+}
+
+export function assert(condition: boolean, message: string) {
+  if (!condition) {
+    throw new AssertionError(message)
+  }
+}
diff --git a/noderl/utils/lists.ts b/noderl/utils/lists.ts
@@ -0,0 +1,7 @@
+export function min(list: number[]): number {
+    return Math.min(...list)
+}
+
+export function max(list: number[]): number {
+    return Math.max(...list)
+}
diff --git a/noderl/utils/random.ts b/noderl/utils/random.ts
@@ -0,0 +1,3 @@
+export function uniform() {
+    return Math.random()
+}
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
diff --git a/tsconfig.json b/tsconfig.json

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+export function uniform() {`
	`2`	`+ return Math.random()`
	`3`	`+}`