diff --git a/.gitignore b/.gitignore index 70fcf5d..50f3249 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,8 @@ build/ tmp/ *.o *.mpy + +venv/ + +.ipynb_checkpoints/ +__pycache__/ diff --git a/Makefile b/Makefile index ca58afd..0a4606d 100644 --- a/Makefile +++ b/Makefile @@ -10,54 +10,60 @@ MPY_DIR_ABS = $(abspath $(MPY_DIR)) MODULES_PATH = ./dist/$(ARCH)_$(MPY_ABI_VERSION) -$(MODULES_PATH)/emltrees.mpy: - make -C src/emltrees/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist +$(MODULES_PATH)/emlearn_trees.mpy: + make -C src/emlearn_trees/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist -$(MODULES_PATH)/emlneighbors.mpy: - make -C src/emlneighbors/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist +$(MODULES_PATH)/emlearn_neighbors.mpy: + make -C src/emlearn_neighbors/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist -$(MODULES_PATH)/emliir.mpy: - make -C src/emliir/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist +$(MODULES_PATH)/emlearn_iir.mpy: + make -C src/emlearn_iir/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist -$(MODULES_PATH)/emlfft.mpy: - make -C src/emlfft/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist +$(MODULES_PATH)/emlearn_fft.mpy: + make -C src/emlearn_fft/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist -$(MODULES_PATH)/tinymaix_cnn.mpy: +$(MODULES_PATH)/emlearn_cnn.mpy: make -C src/tinymaix_cnn/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist -$(MODULES_PATH)/emlkmeans.mpy: - make -C src/emlkmeans/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist +$(MODULES_PATH)/emlearn_kmeans.mpy: + make -C src/emlearn_kmeans/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist -$(MODULES_PATH)/eml_iir_q15.mpy: - make -C src/eml_iir_q15/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist +$(MODULES_PATH)/emlearn_iir_q15.mpy: + make -C src/emlearn_iir_q15/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist -emltrees.results: $(MODULES_PATH)/emltrees.mpy +$(MODULES_PATH)/emlearn_arrayutils.mpy: + make -C src/emlearn_arrayutils/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean dist + +emlearn_trees.results: $(MODULES_PATH)/emlearn_trees.mpy MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_trees.py -emlneighbors.results: $(MODULES_PATH)/emlneighbors.mpy +emlearn_neighbors.results: $(MODULES_PATH)/emlearn_neighbors.mpy MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_neighbors.py -emliir.results: $(MODULES_PATH)/emliir.mpy +emlearn_iir.results: $(MODULES_PATH)/emlearn_iir.mpy MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_iir.py -emlfft.results: $(MODULES_PATH)/emlfft.mpy +emlearn_fft.results: $(MODULES_PATH)/emlearn_fft.mpy MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_fft.py -tinymaix_cnn.results: $(MODULES_PATH)/tinymaix_cnn.mpy +emlearn_cnn.results: $(MODULES_PATH)/emlearn_cnn.mpy MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_cnn.py -emlkmeans.results: $(MODULES_PATH)/emlkmeans.mpy +emlearn_kmeans.results: $(MODULES_PATH)/emlearn_kmeans.mpy MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_kmeans.py -eml_iir_q15.results: $(MODULES_PATH)/eml_iir_q15.mpy +emlearn_iir_q15.results: $(MODULES_PATH)/emlearn_iir_q15.mpy MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_iir_q15.py +emlearn_arrayutils.results: $(MODULES_PATH)/emlearn_arrayutils.mpy + MICROPYPATH=$(MODULES_PATH) $(MICROPYTHON_BIN) tests/test_arrayutils.py + .PHONY: clean clean: - make -C src/emltrees/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean - make -C src/emlneighbors/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean - make -C src/emliir/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean + make -C src/emlearn_trees/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean + make -C src/emlearn_neighbors/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean + make -C src/emlearn_iir/ ARCH=$(ARCH) MPY_DIR=$(MPY_DIR_ABS) V=1 clean rm -rf ./dist RELEASE_NAME = emlearn-micropython-$(VERSION) @@ -68,8 +74,8 @@ release: zip -r $(RELEASE_NAME).zip $(RELEASE_NAME) #cp $(RELEASE_NAME).zip emlearn-micropython-latest.zip -check: emltrees.results emlneighbors.results emliir.results eml_iir_q15.results emlfft.results emlkmeans.results tinymaix_cnn.results +check: emlearn_trees.results emlearn_neighbors.results emlearn_iir.results emlearn_iir_q15.results emlearn_fft.results emlearn_kmeans.results emlearn_arrayutils.results emlearn_cnn.results -dist: $(MODULES_PATH)/emltrees.mpy $(MODULES_PATH)/emlneighbors.mpy $(MODULES_PATH)/emliir.mpy $(MODULES_PATH)/eml_iir_q15.mpy $(MODULES_PATH)/emlfft.mpy $(MODULES_PATH)/emlkmeans.mpy $(MODULES_PATH)/tinymaix_cnn.mpy +dist: $(MODULES_PATH)/emlearn_trees.mpy $(MODULES_PATH)/emlearn_neighbors.mpy $(MODULES_PATH)/emlearn_iir.mpy $(MODULES_PATH)/emlearn_iir_q15.mpy $(MODULES_PATH)/emlearn_fft.mpy $(MODULES_PATH)/emlearn_kmeans.mpy $(MODULES_PATH)/emlearn_arrayutils.mpy $(MODULES_PATH)/emlearn_cnn.mpy diff --git a/README.md b/README.md index 13d7a4e..fa95479 100644 --- a/README.md +++ b/README.md @@ -28,12 +28,20 @@ It can be combined with feature preprocessing, including neural networks to addr - Fast Fourier Transform (FFT) for feature preprocessing, or general DSP - Infinite Impulse Response (IIR) filters for feature preprocessing, or general DSP - Clustering using K-means +- Scaling and data type transformations for `array`, using `emlearn_arrayutils`. - Load/save Numpy .npy files using [micropython-npyfile](https://github.com/jonnor/micropython-npyfile/) - Installable as a MicroPython native module. No rebuild/flashing needed +- Operates on standard `array.array` data structures - Models can be loaded at runtime from a file in disk/flash - Highly efficient. Inference times down to 100 microseconds, RAM usage <2 kB, FLASH usage <2 kB - Pre-built binaries available for most architectures. +## Examples + +- [xor_trees](./examples/xor_trees/). A "Hello World", using RandomForest. +- [mnist_cnn](./examples/mnist_cnn/). Basic image classification, using Convolutional Neural Network. +- [har_trees](./examples/har_trees/). Accelerometer-based Human Activity Recognition, using Random Forest +- [soundlevel_iir](./examples/soundlevel_iir/). Sound Level Meter, using Infinite Impulse Response (IIR) filters. ## Prerequisites @@ -49,10 +57,21 @@ Download the repository with examples etc git clone https://github.com/emlearn/emlearn-micropython ``` -## Installing from a release +## Usage + +Start with the instructions in [XOR example](./examples/xor_trees/). + #### Find architecture and .mpy version +The correct .mpy files to use depend on the CPU architecture of your microcontroller, +as well as the MicroPython version. + +| MicroPython version | .mpy version | +|---------------------| ------------- | +| 1.23.x | 6.3 | + + Identify which CPU architecture your device uses. You need to specify `ARCH` to install the correct module version. @@ -71,43 +90,6 @@ Information is also available in the official documentation: [MicroPython: .mpy files](https://docs.micropython.org/en/latest/reference/mpyfiles.html#versioning-and-compatibility-of-mpy-files) -#### Download release files - -Download from [releases](https://github.com/emlearn/emlearn-micropython/releases). - -#### Install on device - -Copy the .mpy file for the correct `ARCH` to your device. -``` -mpremote cp emltrees.mpy :emltrees.mpy -mpremote cp emlneighbors.mpy :emlneighbors.mpy -``` - -NOTE: If there is no ready-made build for your device/architecture, -then you will need to build the .mpy module yourself. - -## Usage - -NOTE: Make sure to install the module first (see above) - -Train a model with scikit-learn -``` -pip install emlearn scikit-learn -python examples/xor_trees/xor_train.py -``` - -Copy model file to device - -``` -mpremote cp xor_model.csv :xor_model.csv -``` - -Run program that uses the model - -``` -mpremote run examples/xor_run.py -``` - ## Benchmarks #### UCI handwriting digits @@ -149,7 +131,7 @@ make dist ARCH=armv6m MPY_DIR=../micropython Install it on device ``` -mpremote cp dist/armv6m*/emltrees.mpy :emltrees.mpy +mpremote cp dist/armv6m*/emlearn_trees.mpy :emlearn_trees.mpy ``` #### Run tests diff --git a/benchmarks/digits_trees/digits_run.py b/benchmarks/digits_trees/digits_run.py index 74c337b..4b6898f 100644 --- a/benchmarks/digits_trees/digits_run.py +++ b/benchmarks/digits_trees/digits_run.py @@ -6,35 +6,15 @@ from everywhere_digits import RandomForestClassifier import m2c_digits -import emltrees - - -def load_model(builder, f): - - for line in f: - line = line.rstrip('\r') - line = line.rstrip('\n') - tok = line.split(',') - kind = tok[0] - if kind == 'r': - root = int(tok[1]) - emltrees.addroot(builder, root) - elif kind == 'n': - feature = int(tok[1]) - value = float(tok[2]) - left = int(tok[3]) - right = int(tok[4]) - emltrees.addnode(builder, left, right, feature, value) - else: - # unknown value - pass +import emlearn_trees + def emlearn_create(): - model = emltrees.open(7, 150) + model = emlearn_trees.new(10, 1000, 10) # Load a CSV file with the model with open('eml_digits.csv', 'r') as f: - load_model(model, f) + emlearn_trees.load_model(model, f) return model def argmax(l): @@ -61,8 +41,8 @@ def everywhere_run(data): def emlearn_run(data): errors = 0 for idx, x in enumerate(data): - f = array.array('f', x) # NOTE: this takes as long as predict - out = emltrees.predict(model, f) + f = x + out = model.predict(f) if (idx != out): errors += 1 return errors @@ -89,6 +69,7 @@ def none_run(data): def benchmark(): data = digits + data = [ array.array('h', f) for f in data ] print('model,errors,time_us') @@ -116,4 +97,7 @@ def benchmark(): m2c_duration = time.ticks_diff(after, before) print('m2cgen,{},{}'.format(m2c_errors, m2c_duration)) +if __name__ == '__main__': + benchmark() + diff --git a/benchmarks/digits_trees/digits_train.py b/benchmarks/digits_trees/digits_train.py index 572657d..920c819 100644 --- a/benchmarks/digits_trees/digits_train.py +++ b/benchmarks/digits_trees/digits_train.py @@ -42,22 +42,24 @@ def train(): X, y = load_digits(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) - clf = RandomForestClassifier(n_estimators=7, max_leaf_nodes=20) + clf = RandomForestClassifier(n_estimators=10, max_leaf_nodes=100) clf.fit(X_train, y_train) print('Score: %.2f' % clf.score(X_test, y_test)) return clf +def main(): + clf = train() -clf = train() + out_dir = './' + if not os.path.exists(out_dir): + os.makedirs(out_dir) -out_dir = './' -if not os.path.exists(out_dir): - os.makedirs(out_dir) - -emlearn_export(clf, os.path.join(out_dir, 'eml_digits.csv')) -m2c_export(clf, os.path.join(out_dir, 'm2c_digits.py')) -everywhereml_export(clf, os.path.join(out_dir, 'everywhere_digits.py')) + emlearn_export(clf, os.path.join(out_dir, 'eml_digits.csv')) + m2c_export(clf, os.path.join(out_dir, 'm2c_digits.py')) + everywhereml_export(clf, os.path.join(out_dir, 'everywhere_digits.py')) +if __name__ == '__main__': + main() diff --git a/benchmarks/fft/fft_benchmark.py b/benchmarks/fft/fft_benchmark.py index fb281e4..75dd439 100644 --- a/benchmarks/fft/fft_benchmark.py +++ b/benchmarks/fft/fft_benchmark.py @@ -16,9 +16,9 @@ except ImportError as e: print(e) -emlfft = None +emlearn_fft = None try: - import emlfft + import emlearn_fft pass except ImportError as e: print(e) @@ -65,13 +65,13 @@ def run_one(real, imag, n, repeat=10): d = ((time.ticks_diff(time.ticks_us(), start)) / repeat) / 1000.0 # ms print('ulab', n, d) - # FIXME: this causes MicroPython to crash inside emlfft on ESP32 + # FIXME: this causes MicroPython to crash inside emlearn_fft on ESP32 #gc.collect() # emlearn if emlearn: - fft2 = emlfft.FFT(n) - emlfft.fill(fft2, n) + fft2 = emlearn_fft.FFT(n) + emlearn_fft.fill(fft2, n) gc.collect() start = time.ticks_us() diff --git a/benchmarks/fft/fft_benchmark.sh b/benchmarks/fft/fft_benchmark.sh index 4cd65a8..879e5c4 100644 --- a/benchmarks/fft/fft_benchmark.sh +++ b/benchmarks/fft/fft_benchmark.sh @@ -1,9 +1,9 @@ # NOTE: MicroPython must be flashed before-hand -# and emlfft.mpy built +# and emlearn_fft.mpy built MPREMOTE='mpremote' -${MPREMOTE} cp src/emlfft/emlfft.mpy : +${MPREMOTE} cp src/emlearn_fft/emlearn_fft.mpy : ${MPREMOTE} cp benchmarks/fft/fft_python.py : ${MPREMOTE} run benchmarks/fft/fft_benchmark.py diff --git a/benchmarks/iir/iir_benchmark.py b/benchmarks/iir/iir_benchmark.py index 575b04a..8048ccf 100644 --- a/benchmarks/iir/iir_benchmark.py +++ b/benchmarks/iir/iir_benchmark.py @@ -14,9 +14,9 @@ print(e) pass -emliir = None +emlearn_iir = None try: - import emliir + import emlearn_iir except ImportError as e: print(e) pass @@ -70,9 +70,9 @@ def main(): print('ulab', t) # emlearn - if emliir: + if emlearn_iir: start = time.ticks_us() - iir = emliir.new(coeff) + iir = emlearn_iir.new(coeff) for r in range(repeats): iir.run(a) t = (time.ticks_diff(time.ticks_us(), start) / repeats ) / 1000.0 # ms diff --git a/benchmarks/iir/iir_run.py b/benchmarks/iir/iir_run.py index 4c5e489..9c9e4a7 100644 --- a/benchmarks/iir/iir_run.py +++ b/benchmarks/iir/iir_run.py @@ -27,11 +27,11 @@ def iir_process_file(inp, out, filters, chunksize): if len(reader.shape) != 1: raise ValueError("Input must be 1d") if reader.typecode == 'f': - import emliir - filter = emliir.new(coefficients) + import emlearn_iir + filter = emlearn_iir.new(coefficients) elif reader.typecode == 'h': - import eml_iir_q15 - filter = eml_iir_q15.new(coefficients) + import emlearn_iir_q15 + filter = emlearn_iir_q15.new(coefficients) else: raise ValueError("Input must either be float32/f or int16/h") diff --git a/doc/TODO.md b/doc/TODO.md index 57f9b16..d32258a 100644 --- a/doc/TODO.md +++ b/doc/TODO.md @@ -3,7 +3,8 @@ # User journey - Level 0a. Run a pretrained example/demo in the browser -- Level 0b. Run a pretrained example/demo on a board +- Level 0b. Run a pretrained example/demo on PC/host +- Level 0c. Run a pretrained example/demo on a board - Level 1. Train custom model on-device - Level 2. Collect a dataset, do training on PC, deploy back to microcontroller - Level 3. Bake the custom model into the firmware @@ -20,33 +21,21 @@ sequence. On-device training demo -- Record piezo data with ADC. 100 Hz? -Typical taps. Slower pushes. Handling noises. -- Setup event detection for piezo. -In its own module. -Threshold on delta and level? -- Create emliir module, use for piezo detection +- Use accelerometer instead of piezo. On M5StickC, for example +- Compute impulsive-ness feature. Magnitude, RMS, exponential smooth, then Delta * times level ? +- Alternative: Use IIR for knock detection - Maybe blink during unlocked state - Add a blink to each event. For user feedback - Make demo video +- Add some documentation / README - Make state diagram - Make timing diagram. Highlight distances/features -- Add some documentation / README - -Learnings. - -- Putting piezo on small thin plate worked well. -On table not working, no response. -Hitting direct not so good either, rise of finger causes change. Double-trigger. Also tricky to hit in right place. -- LEDs as protection diodes worked well. Both red and green can be used. Lights up on direct hits, if placed by piezo. -- Analog RC filter is beneficial for piezo connections. Using 10k+100nF, has 160 Hz cutoff. Should maybe move it to 80Hz? Since only sampling at 100 Hz. -- Only direct hits can reach trigger levels on 3.3V I/O. Need ADC for other cases. But am seing some 100mV when placed on small plate -Examples +#### Examples - Add a novelty detection example? -Benchmarks +#### Benchmarks - Add FLASH and RAM usage - Test gzip compression of .csv model for trees @@ -56,3 +45,5 @@ Benchmarks In-browser demo - Test MicroPython build for WASM/browser +- Test getting audio input into MicroPython Webassembly +- Test getting IMU data (ie on phone), in browser diff --git a/doc/micropython-tinyml-status.md b/doc/micropython-tinyml-status.md index 3ce7af6..06e5d42 100644 --- a/doc/micropython-tinyml-status.md +++ b/doc/micropython-tinyml-status.md @@ -1,9 +1,21 @@ -How ready is MicroPython for use in a TinyML setting. +How ready is MicroPython for use in a "TinyML" setting, +building applications that utilize Machine Learning models on embedded/microcontroller device. +Since these are typically sensor-centric, data-intensive, connected applications - +the below exposition may also be useful in evaluating related cases. + +NOTE: This is a working document intended to inform development. +Because of this the selection of is inherently somewhat pessimistic: +things that are lacking are more likely to appear rather than those that just-work. + +Overall our belief is that MicroPython is the most promising +development platform for TinyML application development, +and that it has many of the key properties needed to be highly suitable for this purpose. ## Native modules -Completely broken on at least one port, due to being garbage collected. +FIXED. Completely broken on at least one port, due to module functions being garbage collected. +https://github.com/micropython/micropython/issues/6769 Using floating point / math operations is difficult / very tedious. Need to manually resolve all symbols. @@ -11,57 +23,121 @@ https://github.com/micropython/micropython/issues/5629 ## Efficient data processing -Good! Support for fixed buffers, using `array.array`. +#### Using specialized code emitters + +Native machine code emitters with @micropython.native/viper. +Good! Integer only code can be made quite fast using this. + +Limitation. Floating point code cannot be optimized with `@micropython.native`. + +#### Using array.array -!Missing documentation about using array.array. -With or without @micropython.native +Support for fixed buffers, using `array.array`. OK! + +!Very little documentation about using array.array. +What the benefits are, why to use it over lists etc, when it is critical. +With or without `@micropython.native`. ? Not easy to write allocation free code in Python for arrays. Slicing notably makes a copy, whether it is for just reading or writing. -Missing an equivalent of a memoryview for arrays? - -Native machine code emitters with @micropython.native/viper -Good! Integer only code can be made quite fast with -Limitation. Floating point code cannot be optimized with @micropython.native +Missing an equivalent of a `memoryview` for arrays? -Inefficient conversion from bytes to array.array? +Inefficient conversion from bytes to `array.array`? ! Missing a "cast" type operation. -When data comes an IMU/accelerometer over I2C/SPI it is just bytes. +https://github.com/micropython/micropython/issues/4064 +Typical usecase: When data comes an IMU/accelerometer over I2C/SPI it is stored as bytes. However these actually represent integer values, typically int16, stored in either little or big endian formats. TODO: benchmark the various options here. -Plain buffer/array accesses vs struct.unpack / unpack_into +Plain buffer/array accesses (with native/viper) vs `struct.unpack` / `struct.unpack_into`. -!Inefficient creation of array.array -Must use a generator-based constructor - the only type that is standardized. +!Inefficient creation of `array.array`. +Must use a generator-based constructor - the only type that is supported now. This can take seconds for many items. -Would want to initialize , or initialized filled with a particular value. +Would want to initialize empty / with unspecified values, or initialized filled with a particular value (like 0). + +Inefficient copies of `array.array`. + +## Data formats -Semi-related. Encoding base64 cannot be done without allocations. LINK issue +#### Binary encoding into strings Useful for serializing binary data for sending in textual protocols. Be it USART, JSON based web API, etc. -## Interoperability +base64 support is in core library. OK. +But encoding base64 cannot be done without allocations. +https://github.com/micropython/micropython/issues/15513 + +### Numpy .npy files + +For storing multi-dimensional arrays. Basic compression support using ZIP. +Efficient implementation available in [micropython-npyfile](https://github.com/jonnor/micropython-npyfile), including compression with [micropython-zipfile](https://github.com/jonnor/micropython-zipfile). Good + +### Audio files +!No mip-installable library for *.wav* files. +Just various example code lying around at various locations. +Note that CPython defines the API for a wavefile module. +Ideally would be compatible with that. + +?No ready-to-install libraries for OPUS/MP3 encoding/decoding. + +### Image files + +!No mip-installable library for JPEG files. + +!No mip installable library for PNG files. + +OpenMV [omv.image](https://docs.openmv.io/library/omv.image.html) module supports loading/saving JPEG/PNG. + +## Library interoperability No interoperable datastructure for multi-dimensional arrays. +Many projects have invented their own, incompatible, representations. Ref notes on [multi_dimensional_arrays](multi_dimensional_arrays.md). ## Drivers +#### IMU/Accelerometer/gyro Almost no accelerometer/IMU drivers implement FIFO based-readout. Causes poor sampling accuracy / high jitter / limited samplerate / high power consumption. Can be changed by driver authors. [Discussion thread](https://github.com/orgs/micropython/discussions/15512). -I2S is the only digital audio protocol with OK support. +#### Microphone + +`I2S` is the only digital audio protocol with OK support. However, I2S microphones is the more rare variety, PDM is more common. -PDM microphones are not supported, on any port? + +!PDM microphones are not supported, on any port? +Bad, limits available ready-to-run hardware. ESP32, RP2040, STM32, NRF52. +Microphone support also missing on `Unix` port. +Useful to support ready-to-run examples on host PC, testing during development. +And is relevant for deploying on Linux SBC such as Raspberry PI. +Could this be implemented as a .mpy native module? + +#### Camera +No standard interface in upstream MicroPython. +Open issue: https://github.com/micropython/micropython/issues/15753 +With proof-of-concept implementation for ESP32. + +OpenMV has a good module, with drivers for dozens of popular cameras. +But is a custom distribution of MicroPython. + ## Connectivity +#### HTTPS Good support for WiFi based commmunication on ESP32. +Ethernet based support also seems workable on ESP32. -Not good! Low-power BLE support. +MQTT seems to be a bit chaotic. +Multiple competing libraries. +!?Not sure if any are really production grade. + +#### Bluetooth Low Energy +Basic BLE connectivity also seems OK on ESP32. + +Not good! *Low-power* BLE support. ESP32 no support for sleep with Bluetooth. https://github.com/micropython/micropython/pull/8318 @@ -70,36 +146,62 @@ NRF52 BLE support not so good? https://github.com/micropython/micropython/issues/9851 https://github.com/micropython/micropython/pull/8318 -LoRa? -Zigbee? - Missing! Support for Bluetooth Audio. Useful for collecting raw data by sending to a phone or computer. -## Preprocessing +#### LoRa +Unknown/not investigated. + +#### Zigbee +Unknown/not investigated. + + +## Feature Engineering / Preprocessing + +For many Machine Learning use-cases. + +### Filters FIR filters. -numpy.convolve in ulab can probably be used? +`numpy.convolve` in [ulab](https://github.com/v923z/micropython-ulab) can probably be used? IIR filters. -sosfilt in ulab. +`scipy.signal.sosfilt` available in [ulab](https://github.com/v923z/micropython-ulab). +`emlearn_iir` available in [emlearn-micropython](https://github.com/emlearn/emlearn-micropython). + +### Fast Fourier Transform (FFT) +Key part of computing frequency spectrum, or time-frequency representations (spectrogram). FFT. -Available in ulab. +`numpy.fft.fft` available in [ulab](https://github.com/v923z/micropython-ulab). +`emlearn_fft` available in [emlearn-micropython](https://github.com/emlearn/emlearn-micropython). DCT. Not available? ## Machine Learning inference -Logistic regression +### Linear models + +!not available. +Logistic regression. +Support Vector Machine. Especially linear+binary. + +### Tree-based ensembles (Random Forest / Decision Trees) + +`emltrees` available in [emlearn-micropython](https://github.com/emlearn/emlearn-micropython). +Can alternatively generate Python code with everywhereml/m2cgen. +But over 10x slower than emlearn-micropython. + +### K-nearest-neighbours -Support Vector Machine. Especially linear+binary +`emlearn_neighbors` available in [emlearn-micropython](https://github.com/emlearn/emlearn-micropython). -Tree-based ensembles (Random Forest / Decision Trees) +### Convolutional Neural Network -K-nearest-neighbours +`tinymaix_cnn` available in [emlearn-micropython](https://github.com/emlearn/emlearn-micropython). -Convolutional Neural Network +TensorFlow Lite support in [OpenMV](https://docs.openmv.io/library/omv.ml.html). +But as a custom MicroPython distribution. diff --git a/examples/color_quantize_kmeans/color_quantize.py b/examples/color_quantize_kmeans/color_quantize.py index 1389175..68344ad 100644 --- a/examples/color_quantize_kmeans/color_quantize.py +++ b/examples/color_quantize_kmeans/color_quantize.py @@ -9,7 +9,7 @@ import os import gc -import emlkmeans +import emlearn_kmeans @micropython.native def quantize_image(img, quant, palette, rowstride): @@ -55,7 +55,7 @@ def quantize_image_inner(img, quant, palette, rowstride : int, rows : int): #rgb = img[i:i+3] # find closest value in palette - palette_idx, distance = emlkmeans.euclidean_argmin(palette, rgb) + palette_idx, distance = emlearn_kmeans.euclidean_argmin(palette, rgb) #palette_idx, distance = 0, 0 o = row_offset + col @@ -103,7 +103,7 @@ def quantize_path(inp, outp, palette, n_samples=100): # Learn a palette start = time.ticks_us() - emlkmeans.cluster(samples, palette, features=3, max_iter=20) + emlearn_kmeans.cluster(samples, palette, features=3, max_iter=20) dur = (time.ticks_diff(time.ticks_us(), start) / 1000.0) print('cluster duration (ms)', dur) diff --git a/examples/har_trees/.gitignore b/examples/har_trees/.gitignore new file mode 100644 index 0000000..8fce603 --- /dev/null +++ b/examples/har_trees/.gitignore @@ -0,0 +1 @@ +data/ diff --git a/examples/har_trees/README.md b/examples/har_trees/README.md new file mode 100644 index 0000000..e86787e --- /dev/null +++ b/examples/har_trees/README.md @@ -0,0 +1,131 @@ + +# Human Activity Recognition with tree-based models + +*Human Activity Recognition* (HAR) is the task of detecting what activities a human is performing based on motion. +This type of activity recognition is using an Inertial Measurement Unit (IMU) carried on the person. +The IMU consists has at least one accelerometer, gyro or magnetometer - or a combination of these. +It can be a smartphone, smartwatch, fitness tracker, or more specialized device. +The activities can for example be general daily activities, domestic activities, excercises, et.c. + +The same kind of approach can also be applied to animals (*Animal Activity Recognition*), +which enables tracking of pets, lifestock and wild animals. +This has been used for many kinds of animals - such as cats, dogs, diary cattle. + +The same approach can be used for simple gesture recognition. + +## Status +Working. Tested running on ESP32 with MicroPython 1.23. + +**NOTE:** This is primarily *example* code for a Human Activity Recognition, +not a generally-useful pretrained model. +The dataset used is rather simple, and may not reflect the data you get from your device +- which will lead to poor classifications. +For a real world usage you should probably replace the dataset with your own data, collected on your own device. + +## Machine Learning pipeline + +This example uses an approach based on the paper +[Are Microcontrollers Ready for Deep Learning-Based Human Activity Recognition?](https://www.mdpi.com/2079-9292/10/21/2640). +For each time-window, time-based statistical features are computed, +and then classified with a RandomForest model. + +## Dataset +The example uses the [UCI-HAR dataset](https://www.archive.ics.uci.edu/dataset/341/smartphone+based+recognition+of+human+activities+and+postural+transitions). +The classes are by default limited to the three static postures (standing, sitting, lying) plus three dynamic activities (walking, walking downstairs, walking upstairs). +The data is from a waist-mounted smartphone. +Samplerate is 50Hz. +By default only the accelerometer data is used. + + +## TODO + +- Add an illustrative image +- Run the training + test/evaluation in CI +- Add demonstration on LilyGo T-Watch 2020 (BMA423 accelerometer) + + +## Running on host + +To run the example on your PC using Unix port of MicroPython. + +Make sure to have the Unix port of MicroPython 1.23 setup. +On Windows you can use Windows Subsystem for Linux (WSL), or Docker. + +Download the files in this example directory +``` +git clone https://github.com/emlearn/emlearn-micropython.git +cd emlearn-micropython/examples/har_trees +``` + +Install the dependencies +```console +micropython -m mip install https://emlearn.github.io/emlearn-micropython/builds/master/x64_6.3/emlearn_trees.mpy + +micropython har_run.py +``` + +## Running on device (Viper IDE) + +Make sure to have MicroPython 1.23 installed on device. + +The fastest and easiest to to install on your device is to use Viper IDE. +This will install the library and the example code: +[Run using ViperIDE](https://viper-ide.org/?install=github:emlearn/emlearn-micropython/examples/har_trees) + + + +## Run on device + +Make sure to have MicroPython 1.23 installed on device. + +Install the dependencies +```console +mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_trees.mpy +mpremote mip install github:jonnor/micropython-npyfile +mpremote mip install github:jonnor/micropython-zipfile +``` + +Copy example code +``` +mpremote cp har_uci_trees.csv har_uci.testdata.npz timebased.py: +``` + +Run model evaluation on a test set +``` +micropython har_run.py +``` + +## Run on device (with live accelerometer data) + +FIXME: document + +Requires a M5StickC PLUS 2. +Using a MPU6886 accelerometer connected via I2C. + +`har_live.py` + + +## Run training + +This will train a new model. +Uses CPython on the PC. + +Install requirements +``` +pip install -r requirements.txt +``` + +Download training data (4 GB space) +``` +python -m leaf_clustering.data.har.uci +``` + +Run training process +``` +python har_train.py +``` + +This will output a new model (named `_trees.csv`). +Can then be deployed to device following the steps above. + + diff --git a/examples/har_trees/compute_features.py b/examples/har_trees/compute_features.py new file mode 100644 index 0000000..125ab92 --- /dev/null +++ b/examples/har_trees/compute_features.py @@ -0,0 +1,98 @@ + +import sys +import array +import time + +import npyfile + +from timebased import calculate_features_xyz, DATA_TYPECODE + +def compute_dataset_features(data: npyfile.Reader, + skip_samples=0, limit_samples=None, verbose=0): + + # Check that data is expected format + shape = data.shape + assert len(shape) == 3, shape + n_samples, window_length, n_axes = shape + assert n_axes == 3, shape + assert window_length == 128, shape + + # We expect data to be h/int16 + assert data.typecode == DATA_TYPECODE, data.typecode + assert data.itemsize == 2, data.itemsize + + # pre-allocate values + x_values = array.array(DATA_TYPECODE, (0 for _ in range(window_length))) + y_values = array.array(DATA_TYPECODE, (0 for _ in range(window_length))) + z_values = array.array(DATA_TYPECODE, (0 for _ in range(window_length))) + + chunk_size = window_length*n_axes + sample_counter = 0 + + data_chunks = data.read_data_chunks(chunk_size, offset=chunk_size*skip_samples) + for arr in data_chunks: + + # process the data + # De-interleave data from XYZ1 XYZ2... into separate continious X,Y,Z + for i in range(window_length): + x_values[i] = arr[(i*3)+0] + y_values[i] = arr[(i*3)+1] + z_values[i] = arr[(i*3)+2] + + #print(x_values) + #print(y_values) + #print(z_values) + + assert len(x_values) == window_length + assert len(y_values) == window_length + assert len(z_values) == window_length + + feature_calc_start = time.ticks_ms() + features = calculate_features_xyz((x_values, y_values, z_values)) + duration = time.ticks_diff(time.ticks_ms(), feature_calc_start) + if verbose > 2: + print('feature-calc-end', duration) + + yield features + + sample_counter += 1 + if limit_samples is not None and sample_counter > limit_samples: + break + +def main(): + + if len(sys.argv) != 3: + print('Usage: compute_features.py IN.npy OUT.npy') + + _, in_path, out_path = sys.argv + + skip_samples = 0 + limit_samples = None + + out_typecode = 'f' + n_features = 92 + + features_array = array.array(out_typecode, (0 for _ in range(n_features))) + + with npyfile.Reader(in_path) as data: + n_samples, window_length, n_axes = data.shape + + out_shape = (n_samples, n_features) + with npyfile.Writer(out_path, out_shape, out_typecode) as out: + + generator = compute_dataset_features(data, + skip_samples=skip_samples, + limit_samples=limit_samples, + ) + for features in generator: + #print('features', len(features), features) + assert len(features) == n_features, (len(features), n_features) + + for i, f in enumerate(features): + features_array[i] = f + + out.write_values(features_array) + +if __name__ == '__main__': + main() + diff --git a/examples/har_trees/har_live.py b/examples/har_trees/har_live.py new file mode 100644 index 0000000..3d8ab5d --- /dev/null +++ b/examples/har_trees/har_live.py @@ -0,0 +1,74 @@ + +from machine import Pin, I2C +from mpu6886 import MPU6886 + +import time +import struct +import array + +from windower import TriaxialWindower, empty_array +import timebased +import emlearn_trees + +def mean(arr): + m = sum(arr) / float(len(arr)) + return m + +def main(): + + classname_index = {"LAYING": 0, "SITTING": 1, "STANDING": 2, "WALKING": 3, "WALKING_DOWNSTAIRS": 4, "WALKING_UPSTAIRS": 5} + class_index_to_name = { v: k for k, v in classname_index.items() } + + # Load a CSV file with the model + model = emlearn_trees.new(10, 1000, 10) + with open('har_uci_trees.csv', 'r') as f: + emlearn_trees.load_model(model, f) + + i2c = I2C(sda=21, scl=22, freq=100000) + mpu = MPU6886(i2c) + + # Enable FIFO at a fixed samplerate + mpu.fifo_enable(True) + mpu.set_odr(100) + + window_length = 100 + hop_length = 50 + chunk = bytearray(mpu.bytes_per_sample*hop_length) + + x_values = empty_array('h', hop_length) + y_values = empty_array('h', hop_length) + z_values = empty_array('h', hop_length) + windower = TriaxialWindower(window_length) + + features_typecode = timebased.DATA_TYPECODE + n_features = timebased.N_FEATURES + features = array.array(features_typecode, (0 for _ in range(n_features))) + + while True: + + count = mpu.get_fifo_count() + if count >= hop_length: + start = time.ticks_ms() + # read data + mpu.read_samples_into(chunk) + mpu.deinterleave_samples(chunk, x_values, y_values, z_values) + windower.push(x_values, y_values, z_values) + if windower.full(): + # compute features + #print('xyz', mean(x_values), mean(y_values), mean(z_values)) + ff = timebased.calculate_features_xyz((x_values, y_values, z_values)) + for i, f in enumerate(ff): + features[i] = int(f) + + # Cun classifier + result = model.predict(features) + activity = class_index_to_name[result] + + d = time.ticks_diff(time.ticks_ms(), start) + print('class', activity, d) + + time.sleep_ms(1) + + +if __name__ == '__main__': + main() diff --git a/examples/har_trees/har_run.py b/examples/har_trees/har_run.py new file mode 100644 index 0000000..11c3d8a --- /dev/null +++ b/examples/har_trees/har_run.py @@ -0,0 +1,79 @@ + +import array + +import zipfile +import npyfile +import emlearn_trees + +def har_load_test_data(path, + skip_samples=0, limit_samples=None): + + n_features = 92 + + with zipfile.ZipFile(path) as archive: + ext = '.npy' + files = { f.rstrip(ext): f for f in archive.namelist() if f.endswith(ext) } + #print('archive-files', files.keys()) + + X_file = archive.open('X.npy') + Y_file = archive.open('Y.npy') + + with npyfile.Reader(Y_file) as labels: + assert len(labels.shape) == 1 + + with npyfile.Reader(X_file) as data: + # Check that data is expected format: sample x features, int16 + shape = data.shape + assert len(shape) == 2, shape + assert shape[1] == n_features, shape + assert data.itemsize == 2 + assert data.typecode == 'h' + + # Number of labels should match number of data items + assert data.shape[0] == labels.shape[0] + + # Read data and label for one image at a time + data_chunk = n_features + sample_count = 0 + + label_chunks = labels.read_data_chunks(1, offset=1*skip_samples) + data_chunks = data.read_data_chunks(data_chunk, offset=data_chunk*skip_samples) + + for l_arr, arr in zip(label_chunks, data_chunks): + + yield arr, l_arr + + sample_count += 1 + if limit_samples is not None and sample_count > limit_samples: + break + + + +def main(): + + model = emlearn_trees.new(10, 1000, 10) + + # Load a CSV file with the model + with open('har_uci_trees.csv', 'r') as f: + emlearn_trees.load_model(model, f) + + + errors = 0 + total = 0 + data_path = 'har_uci.testdata.npz' + for features, labels in har_load_test_data(data_path): + + assert len(labels) == 1 + label = labels[0] + result = model.predict(features) + if result != label: + errors += 1 + total += 1 + + #print(result, label) + + acc = 1.0 - (errors/float(total)) + print(f'har-run-result accuracy={acc*100:.1f}%') + +if __name__ == '__main__': + main() diff --git a/examples/har_trees/har_train.py b/examples/har_trees/har_train.py new file mode 100644 index 0000000..ab5ca87 --- /dev/null +++ b/examples/har_trees/har_train.py @@ -0,0 +1,409 @@ + +import os +import time +import uuid +import pickle +import tempfile +import subprocess +import json + +import pandas +import numpy +import structlog + +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import f1_score, make_scorer +from sklearn.model_selection import GridSearchCV, GroupShuffleSplit + +import emlearn +from emlearn.preprocessing.quantizer import Quantizer +from sklearn.preprocessing import LabelEncoder + +log = structlog.get_logger() + + +def evaluate(windows : pandas.DataFrame, groupby, hyperparameters, + random_state=1, n_splits=5, label_column='activity'): + + # Setup subject-based cross validation + splitter = GroupShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=random_state) + + clf = RandomForestClassifier(random_state = random_state, n_jobs=1, class_weight = "balanced") + + f1_micro = 'f1_micro' + + search = GridSearchCV( + clf, + param_grid=hyperparameters, + scoring={ + # our predictive model metric + 'f1_micro': f1_micro, + }, + refit='f1_micro', + cv=splitter, + return_train_score=True, + n_jobs=4, + verbose=2, + ) + + feature_columns = sorted(set(windows.columns) - set([label_column])) + assert 'subject' not in feature_columns + assert 'activity' not in feature_columns + X = windows[feature_columns] + Y = windows[label_column] + groups = windows.index.get_level_values(groupby) + search.fit(X, Y, groups=groups) + + results = pandas.DataFrame(search.cv_results_) + estimator = search.best_estimator_ + + return results, estimator + + +def extract_windows(sensordata : pandas.DataFrame, + window_length : int, + window_hop : int, + groupby : list[str], + ): + + groups = sensordata.groupby(groupby, observed=True) + + + for group_idx, group_df in groups: + + windows = [] + + # make sure order is correct + group_df = group_df.reset_index().set_index('time').sort_index() + + # create windows + win_start = 0 + length = len(group_df) + while win_start < length: + win_end = win_start + window_length + # ignore partial window at the end + if win_end > length: + break + + win = group_df.iloc[win_start:win_end].copy() + win['window'] = win.index[0] + assert len(win) == window_length, (len(win), window_length) + + windows.append(win) + + win_start += window_hop + + yield windows + +def assign_window_label(labels, majority=0.66): + """ + Assign the most common label to window, if it is above the @majority threshold + Otherwise return NA + """ + counts = labels.value_counts(dropna=True) + threshold = majority * len(labels) + if counts.iloc[0] > threshold: + return counts.index[0] + else: + return None + + +def timebased_features(windows : list[pandas.DataFrame], + columns : list[str], + micropython_bin='micropython') -> pandas.DataFrame: + + #print('w', len(windows), columns) + + here = os.path.dirname(__file__) + feature_extraction_script = os.path.join(here, 'compute_features.py') + + # XXX: this scaling should go elsewhere + data = numpy.stack([ d[columns] for d in windows ]) + data = ((data / 4.0) * (2**15-1)).astype(numpy.int16) + + #log.debug('data-range', + # upper=numpy.quantile(data, 0.99), + # lower=numpy.quantile(data, 0.01), + #) + + with tempfile.TemporaryDirectory() as tempdir: + data_path = os.path.join(tempdir, 'data.npy') + features_path = os.path.join(tempdir, 'features.npy') + + # Persist output + numpy.save(data_path, data) + + # Run MicroPython program + args = [ + micropython_bin, + feature_extraction_script, + data_path, + features_path, + ] + cmd = ' '.join(args) + #log.debug('run-micropython', cmd=cmd) + try: + out = subprocess.check_output(args) + except subprocess.CalledProcessError as e: + log.error('micropython-error', out=e.stdout, err=e.stderr) + raise e + + # Load output + out = numpy.load(features_path) + assert len(out) == len(data) + + # TODO: add feature names + df = pandas.DataFrame(out) + + return df + +def extract_features(sensordata : pandas.DataFrame, + columns : list[str], + groupby, + window_length = 128, + window_hop = 64, + features='timebased', + quant_div = 4, + quant_depth = 6, + label_column='activity', + ) -> pandas.DataFrame: + """ + Convert sensor data into fixed-sized time windows and extact features + """ + + if features == 'quant': + raise NotImplementedError + elif features == 'timebased': + feature_extractor = lambda w: timebased_features(w, columns=columns) + else: + raise ValueError(f"Unsupported features: {features}") + + # Split into fixed-length windows + features_values = [] + generator = extract_windows(sensordata, window_length, window_hop, groupby=groupby) + for windows in generator: + + # drop invalid data + windows = [ w for w in windows if not w[columns].isnull().values.any() ] + + # Extract features + df = feature_extractor(windows) + + # Convert features to 16-bit integers + quant = Quantizer().fit_transform(df.values) + df.loc[:,:] = quant + + # Attach labels + df[label_column] = [ assign_window_label(w[label_column]) for w in windows ] + + # Combine with identifying information + index_columns = list(groupby + ['window']) + for idx_column in index_columns: + df[idx_column] = [w[idx_column].iloc[0] for w in windows] + df = df.set_index(index_columns) + + features_values.append(df) + + out = pandas.concat(features_values) + return out + +def export_model(path, out): + + with open(path, "rb") as f: + classifier = pickle.load(f) + + classes = classifier.classes_ + class_mapping = dict(zip(classes, range(len(classifier.classes_)))) + + cmodel = emlearn.convert(classifier) + cmodel.save(name='harmodel', format='csv', file=out) + + +def run_pipeline(run, hyperparameters, dataset, + data_dir, + out_dir, + n_splits=5, + features='timebased', + ): + + dataset_config = { + 'uci_har': dict( + groups=['subject', 'experiment'], + data_columns = ['acc_x', 'acc_y', 'acc_z'], + classes = [ + #'STAND_TO_LIE', + #'SIT_TO_LIE', + #'LIE_TO_SIT', + #'STAND_TO_SIT', + #'LIE_TO_STAND', + #'SIT_TO_STAND', + 'STANDING', 'LAYING', 'SITTING', + 'WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', + ], + ), + 'pamap2': dict( + groups=['subject'], + data_columns = ['hand_acceleration_16g_x', 'hand_acceleration_16g_y', 'hand_acceleration_16g_z'], + classes = [ + #'transient', + 'walking', 'ironing', 'lying', 'standing', + 'Nordic_walking', 'sitting', 'vacuum_cleaning', + 'cycling', 'ascending_stairs', 'descending_stairs', + 'running', 'rope_jumping', + ], + ), + } + + if not os.path.exists(out_dir): + os.makedirs(out_dir) + + data_path = os.path.join(data_dir, f'{dataset}.parquet') + + data_load_start = time.time() + data = pandas.read_parquet(data_path) + + #print(data.index.names) + #print(data.columns) + + groups = dataset_config[dataset]['groups'] + data_columns = dataset_config[dataset]['data_columns'] + enabled_classes = dataset_config[dataset]['classes'] + + data_load_duration = time.time() - data_load_start + log.info('data-loaded', dataset=dataset, samples=len(data), duration=data_load_duration) + + feature_extraction_start = time.time() + features = extract_features(data, columns=data_columns, groupby=groups, features=features) + labeled = numpy.count_nonzero(features['activity'].notna()) + + feature_extraction_duration = time.time() - feature_extraction_start + log.info('feature-extraction-done', + dataset=dataset, + total_instances=len(features), + labeled_instances=labeled, + duration=feature_extraction_duration, + ) + + # Drop windows without labels + features = features[features.activity.notna()] + + # Keep only windows with enabled classes + features = features[features.activity.isin(enabled_classes)] + + print('Class distribution\n', features['activity'].value_counts(dropna=False)) + + # Run train-evaluate + results, estimator = evaluate(features, + hyperparameters=hyperparameters, + groupby='subject', + n_splits=n_splits, + ) + + # Save a model + estimator_path = os.path.join(out_dir, f'{dataset}.estimator.pickle') + with open(estimator_path, 'wb') as f: + pickle.dump(estimator, file=f) + + # Export model with emlearn + model_path = os.path.join(out_dir, f'{dataset}_trees.csv') + export_model(estimator_path, model_path) + + # Save testdata + label_column = 'activity' + classes = estimator.classes_ + class_mapping = dict(zip(classes, range(len(classes)))) + meta_path = os.path.join(out_dir, f'{dataset}.meta.json') + with open(meta_path, 'w') as f: + f.write(json.dumps(class_mapping)) + + testdata_path = os.path.join(out_dir, f'{dataset}.testdata.npz') + testdata = features.groupby(label_column, as_index=False).sample(n=10) + # convert to class number/index + testdata['class'] = testdata[label_column].map(class_mapping) + feature_columns = sorted(set(testdata.columns) - set([label_column, 'class'])) + numpy.savez(testdata_path, + X=numpy.ascontiguousarray(testdata[feature_columns].astype(numpy.int16)), + Y=numpy.ascontiguousarray(testdata['class'].astype(numpy.int8)), + ) + + # Save results + results['dataset'] = dataset + results['run'] = run + results_path = os.path.join(out_dir, f'{dataset}.results.parquet') + results.to_parquet(results_path) + print('Results written to', results_path) + + return results + +def autoparse_number(s): + if '.' in s: + return float(s) + else: + return int(s) + +def config_number_list(var : str, default : str, delim=',') -> list[int]: + + s = os.environ.get(var, default) + tok = s.split(delim) + values = [ autoparse_number(v.strip()) for v in tok if v.strip() ] + + return values + +def parse(): + import argparse + parser = argparse.ArgumentParser(description='') + + parser.add_argument('--dataset', type=str, default='uci_har', + help='Which dataset to use') + parser.add_argument('--data-dir', metavar='DIRECTORY', type=str, default='./data/processed', + help='Where the input data is stored') + parser.add_argument('--out-dir', metavar='DIRECTORY', type=str, default='./', + help='Where to store results') + + parser.add_argument('--features', type=str, default='timebased', + help='Which feature-set to use') + + args = parser.parse_args() + + return args + + +def main(): + + args = parse() + dataset = args.dataset + out_dir = args.out_dir + data_dir = args.data_dir + + run_id = uuid.uuid4().hex.upper()[0:6] + + min_samples_leaf = config_number_list('MIN_SAMPLES_LEAF', '1,4,16,64,256') + trees = config_number_list('TREES', '10') + + hyperparameters = { + "max_features": [ 0.30 ], + 'n_estimators': trees, + 'min_samples_leaf': min_samples_leaf, + } + + results = run_pipeline(dataset=args.dataset, + out_dir=args.out_dir, + data_dir=args.data_dir, + run=run_id, + hyperparameters=hyperparameters, + n_splits=int(os.environ.get('FOLDS', '5')), + features=args.features, + ) + + df = results.rename(columns=lambda c: c.replace('param_', '')) + display_columns = [ + 'n_estimators', + 'min_samples_leaf', + 'mean_train_f1_micro', + 'mean_test_f1_micro', + ] + + print('Results\n', df[display_columns]) + +if __name__ == '__main__': + main() diff --git a/examples/har_trees/har_uci.testdata.npz b/examples/har_trees/har_uci.testdata.npz new file mode 100644 index 0000000..8d417a5 Binary files /dev/null and b/examples/har_trees/har_uci.testdata.npz differ diff --git a/examples/har_trees/har_uci_trees.csv b/examples/har_trees/har_uci_trees.csv new file mode 100644 index 0000000..2d7b709 --- /dev/null +++ b/examples/har_trees/har_uci_trees.csv @@ -0,0 +1,293 @@ +f,92 +c,6 +l,0 +l,1 +l,2 +l,5 +l,3 +l,4 +r,0 +r,57 +r,114 +r,170 +r,226 +n,1,529.5,-1,1 +n,80,50.5,1,22 +n,13,-75.5,1,11 +n,20,257.0,1,-2 +n,13,-144.5,1,7 +n,8,13065.0,1,4 +n,89,2611.0,1,-3 +n,13,-320.5,1,-3 +n,5,1124.0,-3,-3 +n,20,19.5,-3,1 +n,18,1963.0,-3,-3 +n,24,-63.0,-3,1 +n,30,1346.0,-3,-3 +n,36,16575.5,1,9 +n,86,4552.0,1,7 +n,13,9.5,1,3 +n,20,-46.0,-2,1 +n,22,135.5,-3,-2 +n,25,-78.0,-2,1 +n,24,77.5,1,-2 +n,12,63.5,-2,-2 +n,25,47.5,-2,-3 +n,7,909.5,-2,-2 +n,1,1928.5,1,25 +n,0,1106.5,1,9 +n,74,930.0,1,5 +n,74,763.0,1,3 +n,4,903.5,1,-4 +n,44,29450.5,-4,-4 +n,50,-26.5,-4,-4 +n,4,810.5,-4,1 +n,82,2665.5,1,-5 +n,32,2836.5,-5,-4 +n,13,-207.5,1,8 +n,67,155.5,1,2 +n,27,156.5,-4,-4 +n,57,230.5,-4,1 +n,13,-310.5,-5,1 +n,1,1839.5,1,-5 +n,62,66.5,-5,1 +n,14,-403.5,-5,-5 +n,63,64.5,-4,1 +n,1,1875.5,1,-5 +n,61,27.5,-5,1 +n,50,-28.5,1,2 +n,61,39.5,-5,-5 +n,10,-92.5,1,-5 +n,24,-78.0,-5,-5 +n,67,198.5,1,2 +n,21,252.5,-6,-4 +n,33,1467.5,1,5 +n,3,1117.5,1,-5 +n,50,-50.5,1,2 +n,8,13308.0,-6,-6 +n,21,204.5,-6,-6 +n,32,3456.0,1,-6 +n,15,-158.0,-4,-6 +n,3,439.5,-1,1 +n,77,-228.5,1,35 +n,6,540.5,1,30 +n,15,-155.5,1,10 +n,73,64.5,1,4 +n,4,975.5,1,-4 +n,73,58.5,1,-4 +n,89,2474.5,-4,-4 +n,8,12700.5,1,3 +n,17,475.5,1,-4 +n,53,150.5,-4,-4 +n,67,350.5,1,-6 +n,0,1117.5,-4,-5 +n,33,1363.5,1,4 +n,1,1900.5,-5,1 +n,32,2775.5,-6,1 +n,36,17531.5,-6,-6 +n,1,1918.0,1,13 +n,63,75.5,1,3 +n,18,2326.5,-4,1 +n,26,197.5,-4,-4 +n,23,-344.5,-4,1 +n,73,70.5,1,4 +n,32,2613.5,1,-4 +n,32,2380.0,1,-5 +n,20,92.5,-5,-5 +n,1,1758.5,1,2 +n,63,97.5,-5,-5 +n,16,220.5,-5,1 +n,21,338.5,-5,-5 +n,67,275.5,-4,1 +n,20,-82.0,-6,-5 +n,1,2005.5,-4,1 +n,33,1475.5,1,-6 +n,50,-56.5,1,-6 +n,20,-209.5,-6,-6 +n,14,-78.5,1,11 +n,20,202.5,1,-2 +n,15,-142.5,1,7 +n,4,1154.5,1,4 +n,14,-325.5,1,2 +n,35,1606.5,-3,-3 +n,90,29191.0,-3,-3 +n,15,-203.5,-3,1 +n,27,76.5,-3,-3 +n,23,-32.5,-3,1 +n,18,1284.5,-3,-3 +n,31,1420.5,1,8 +n,11,112.5,1,5 +n,36,14756.5,1,2 +n,18,247.0,-2,-2 +n,77,-30.5,-3,1 +n,33,1377.5,-2,-2 +n,85,-4542.5,-2,1 +n,74,107.5,-2,-2 +n,31,1459.5,-2,1 +n,12,125.5,-2,-2 +n,3,435.5,-1,1 +n,72,11.5,1,21 +n,14,-78.5,1,11 +n,21,288.5,1,-2 +n,18,1582.0,1,3 +n,20,-23.5,-2,1 +n,35,1334.0,-3,-3 +n,10,-166.5,1,5 +n,23,-51.5,1,3 +n,25,-145.5,-3,1 +n,25,-75.5,-3,-3 +n,8,13140.5,-3,-3 +n,20,74.5,-3,-3 +n,31,1430.5,1,8 +n,10,3.5,1,3 +n,24,-39.0,-2,1 +n,1,1188.5,-2,-3 +n,25,-86.5,-3,1 +n,67,64.5,1,2 +n,7,1155.5,-2,-2 +n,7,1158.5,-2,-2 +n,21,161.0,-2,-2 +n,1,1985.5,1,26 +n,64,893.5,1,7 +n,18,2519.0,1,2 +n,64,692.0,-4,-5 +n,2,715.5,1,2 +n,64,843.5,-4,-4 +n,74,738.0,1,-4 +n,14,-324.5,-4,-4 +n,13,-303.5,1,5 +n,81,241.5,1,-5 +n,34,514.0,-4,1 +n,67,288.5,1,-4 +n,4,890.0,-4,-4 +n,23,-367.5,-4,1 +n,1,1837.5,1,9 +n,67,152.0,-5,1 +n,48,-312.5,1,-5 +n,22,-779.0,-5,1 +n,74,874.5,1,3 +n,32,2560.5,1,-5 +n,5,1304.5,-5,-5 +n,22,-624.0,-5,1 +n,57,291.5,-5,-5 +n,74,825.0,-4,1 +n,32,2666.5,-5,1 +n,33,1451.5,-6,-5 +n,33,1433.5,1,5 +n,3,1091.5,1,-6 +n,74,751.0,-6,1 +n,50,-45.5,1,-6 +n,87,96.5,-6,-6 +n,73,75.5,-4,1 +n,3,1077.5,1,-5 +n,7,1175.5,-6,-6 +n,80,50.5,1,23 +n,4,434.0,-1,1 +n,13,-77.5,1,11 +n,22,231.5,1,-2 +n,13,-165.5,1,6 +n,20,-53.5,1,3 +n,74,163.0,-3,1 +n,34,25.5,-3,-3 +n,6,13.5,1,-3 +n,54,1166.5,-3,-3 +n,22,-130.5,-3,1 +n,24,25.0,-3,1 +n,22,76.5,-3,-3 +n,15,102.5,1,8 +n,12,-85.5,-3,1 +n,28,2997.0,1,-2 +n,36,14756.5,1,3 +n,17,23.5,-2,1 +n,28,703.5,-2,-2 +n,28,1459.0,-3,1 +n,43,2657.5,-2,-2 +n,5,1147.5,-2,1 +n,20,93.5,-2,-2 +n,1,2041.5,1,27 +n,67,222.5,1,11 +n,15,-81.5,1,8 +n,4,921.5,1,4 +n,82,2414.0,1,-4 +n,74,845.0,1,-4 +n,35,1660.0,-4,-4 +n,68,-257.0,-5,1 +n,64,1014.0,1,-4 +n,18,3323.5,-4,-4 +n,35,1574.5,1,-4 +n,64,1064.0,-5,-5 +n,3,1070.5,1,7 +n,33,1453.5,1,2 +n,58,-342.5,-6,-6 +n,10,-317.5,1,2 +n,25,-170.5,-4,-4 +n,1,1799.5,1,-6 +n,11,119.5,-5,-4 +n,1,1909.5,1,7 +n,74,673.5,-5,1 +n,33,1582.0,1,-5 +n,25,-62.5,-5,1 +n,10,-107.0,1,-5 +n,3,1108.5,-5,1 +n,42,4244.0,-5,-5 +n,79,0.5,-6,-5 +n,51,77.5,1,4 +n,50,-45.5,1,-6 +n,67,244.5,-6,1 +n,48,-685.5,-6,-6 +n,52,150.5,-4,1 +n,20,-25.5,-6,-6 +n,80,50.5,1,23 +n,8,5088.5,-1,1 +n,14,-73.5,1,11 +n,18,1954.5,1,6 +n,22,-114.5,1,2 +n,21,-163.5,-3,-2 +n,36,16393.0,1,-3 +n,3,1169.5,1,-3 +n,1,1170.5,-3,-3 +n,2,993.5,-3,1 +n,24,-80.5,1,2 +n,25,-143.5,-3,-3 +n,18,2095.0,-3,-3 +n,13,104.5,1,7 +n,27,268.5,1,-2 +n,85,-4578.5,-3,1 +n,38,-249.5,-2,1 +n,27,80.5,-2,1 +n,77,-30.5,-2,1 +n,31,1339.5,-2,-2 +n,3,1139.5,1,2 +n,86,4328.5,-2,-2 +n,2,1140.5,-2,-2 +n,1,2041.5,1,25 +n,15,-155.5,1,8 +n,74,764.5,1,3 +n,2,728.5,1,-4 +n,4,917.5,-4,-4 +n,8,12698.5,1,3 +n,18,5438.5,1,-4 +n,2,611.5,-4,-4 +n,4,912.0,-4,-5 +n,5,1266.5,1,6 +n,67,201.5,1,2 +n,2,733.5,-4,-4 +n,20,-262.0,1,2 +n,31,993.5,-4,-4 +n,1,1743.5,-5,-6 +n,32,2611.5,1,5 +n,74,799.0,1,3 +n,64,988.5,-4,1 +n,34,498.5,-5,-5 +n,4,920.5,-5,-5 +n,58,-416.5,1,3 +n,64,1445.5,-5,1 +n,1,1866.5,-5,-5 +n,67,216.5,-4,1 +n,70,-15.5,-6,-5 +n,79,3.5,1,-6 +n,13,-241.5,1,2 +n,21,241.5,-6,-6 +n,48,-799.5,-6,1 +n,74,771.5,-6,1 +n,71,32.5,-6,-6 \ No newline at end of file diff --git a/examples/har_trees/package.json b/examples/har_trees/package.json new file mode 100644 index 0000000..1983039 --- /dev/null +++ b/examples/har_trees/package.json @@ -0,0 +1,17 @@ +{ + "name": "emlearn-micropython Human Activity Recognition example", + "version": "0.3.0", + "description": "Classify accelerometer activities using Random Forest", + "author": "Jon Nordby", + "license": "MIT", + "readme": "github:emlearn/emlearn-micropython/blob/master/examples/har_trees/README.md", + "keywords": "humanacticityrecognition,accelerometer,IMU,machinelearning,randomforest,classification,decisiontree", + "urls": [ + ["fs:har_run.py", "har_run.py"], + ["fs:har_live.py", "har_live.py"], + ["fs:timebased.py", "timebased.py"], + ["fs:har_uci_trees.csv", "har_uci_trees.csv"], + ["fs:har_uci.testdata.npz", "har_uci.testdata.npz"] + ], + "deps": [["emlearn", "master"], ["npyfile", "master"], ["zipfile", "master"]] +} diff --git a/examples/har_trees/pamap2_windows.npy b/examples/har_trees/pamap2_windows.npy new file mode 100644 index 0000000..79eab7a Binary files /dev/null and b/examples/har_trees/pamap2_windows.npy differ diff --git a/examples/har_trees/requirements.txt b/examples/har_trees/requirements.txt new file mode 100644 index 0000000..21a4f51 --- /dev/null +++ b/examples/har_trees/requirements.txt @@ -0,0 +1,7 @@ +pandas +pyarrow +git+https://github.com/jonnor/leaf-clustering +emlearn +scikit-learn +setuptools +structlog diff --git a/examples/har_trees/timebased.py b/examples/har_trees/timebased.py new file mode 100644 index 0000000..46022ef --- /dev/null +++ b/examples/har_trees/timebased.py @@ -0,0 +1,323 @@ + + +# Based on https://github.com/atiselsts/feature-group-selection/blob/10853f32baf731d9dc3654043dc457568e7ecead/datasets/extract-features.py +# +# Modified to separate feaure calculation from IO -- Jon Nordby, 2024 +# Modified to run on MicroPython -- Jon Nordby, 2024 +# +# Description: extracts features (and transforms from raw acceleration data). +# The input data must alreayd be segmented in windows. For each window, all features are computed. +# Author: Atis Elsts, 2019 + + +import os +import sys +import math +import array +import time + +######################################### + +DATA_TYPECODE = 'h' # int16 + +L1_NORM = 1 +L2_NORM = 2 +L2_NORM_SQUARED = 3 + +######################################### + +FEATURE_MEAN = 0 +FEATURE_MEDIAN = 1 +FEATURE_Q25 = 2 +FEATURE_Q75 = 3 +FEATURE_IQR = 4 +FEATURE_MIN = 5 +FEATURE_MAX = 6 +FEATURE_STD = 7 +FEATURE_ENERGY = 8 +FEATURE_ENTROPY = 9 +ORDERED_FEATURES_N = 10 + +N_FEATURES = 92 + +@micropython.native +def l2_sum(arr): + acc = 0.0 + for x in arr: + acc += (x*x) + return acc + +@micropython.native +def ordered_features(matrix : array.array, results : array.array): + assert len(results) == ORDERED_FEATURES_N + + WINDOW_SIZE = len(matrix) + v = matrix + MEDIAN = WINDOW_SIZE // 2 + Q1 = WINDOW_SIZE // 4 + Q3 = 3 * WINDOW_SIZE // 4 + + l = sorted(v) + sm = sum(l) + sqs = l2_sum(l) + avg = sm / len(l) + median = l[MEDIAN] + mn = l[0] + mx = l[-1] + energy = ((sqs / len(l)) ** 0.5) # rms + std = ((sqs - avg * avg) ** 0.5) + + # FIXME: implement FEATURE_MAD + #mad_list = [abs(x - l[MEDIAN]) for x in l] + #mad_list.sort() + #mad.append(mad_list[MEDIAN]) + + # FIXME: implement entropy + #bins, bin_edges = np.histogram(l, bins=10, density=True) + #print(scipy.stats.entropy(bins), bins) + # scipy.stats.entropy(bins) + entropy = 0 # FIXME: not implemented + + results[FEATURE_MEAN] = avg + results[FEATURE_MEDIAN] = median + results[FEATURE_Q25] = l[Q1] + results[FEATURE_Q75] = l[Q3] + results[FEATURE_IQR] = (l[Q3] - l[Q1]) + results[FEATURE_MIN] = mn + results[FEATURE_MAX] = mx + results[FEATURE_STD] = std + results[FEATURE_ENERGY] = energy + results[FEATURE_ENTROPY] = entropy + + +######################################### + +def corr(a, b, out): + + for i, (v1, v2) in enumerate(zip(a, b)): + cc = np.corrcoef(v1, v2) + r = cc[0][1] + if math.isnan(r): + r = 1.0 # std == 0; assume perfect correlation (wise?) + out[i] = r + +#########################################w + +@micropython.native +def jerk_filter(inp, out): + out[0] = 0 + for i in range(len(inp) - 1): + out[i] = (inp[i + 1] - inp[i]) + +#########################################w + +@micropython.native +def norm_filter_l1(x, y, z, out): + for i in range(len(x)): + out[i] = abs(x[i]) + abs(y[i]) + abs(z[i]) + +@micropython.native +def norm_filter_l2(x, y, z, out): + for i in range(len(x)): + out[i] = (x[i]*x[i] + y[i]*y[i] + z[i]*z[i])**0.5 + +@micropython.native +def norm_filter_l2_squared(x, y, z, out): + for i in range(len(x)): + out[i] = (x[i]*x[i] + y[i]*y[i] + z[i]*z[i]) + +########################################## + +@micropython.native +def median(a, b, c): + if a > b: + if b > c: + return b # a, b, c + if a > c: + return c # a, c, b + return a # c, a, b + else: # a <= b + if a > c: + return a # b, a, c + if b > c: + return c # b, c, a + return b # c, b, a + +@micropython.native +def median_filter(inp, out): + + #return None + + out[0] = inp[0] + for i in range(1, len(inp) - 1): + v = median(inp[i-1], inp[i], inp[i+1]) + out[i] = v + out[-1] = inp[-1] + + +def calculate_features_xyz(xyz): + + xo, yo, zo = xyz + window_length = len(xo) + + # pre-allocate arrays + alloc_start = time.ticks_ms() + + xm = array.array(DATA_TYPECODE, range(window_length)) + ym = array.array(DATA_TYPECODE, range(window_length)) + zm = array.array(DATA_TYPECODE, range(window_length)) + + l2_norm_sq = array.array(DATA_TYPECODE, range(window_length)) + l1_norm = array.array(DATA_TYPECODE, range(window_length)) + l1_norm_jerk = array.array(DATA_TYPECODE, range(window_length)) + l2_norm_sq_jerk = array.array(DATA_TYPECODE, range(window_length)) + + # Compute filtered versions + filter_start = time.ticks_ms() + + median_filter(xo, xm) + median_filter(yo, ym) + median_filter(zo, zm) + + # Doing a derivative is going to reduce the effective recovery data frequency 2 times. + # This assumes that the data is already low-pass filtered (for 50 Hz to 20 Hz in the dataset) + # therefore high-frequency components are negligible + jerk_filter(xm, xo) + jerk_filter(ym, yo) + jerk_filter(zm, zo) + + x = xm + y = ym + z = zm + x_jerk = xo + y_jerk = yo + z_jerk = zo + + # compute norms + # do the squared L2 norm for now instead of the normal L2 norm + norm_filter_l1(x, y, z, l1_norm) + norm_filter_l2_squared(x, y, z, l2_norm_sq) + jerk_filter(l1_norm, l1_norm_jerk) + jerk_filter(l2_norm_sq, l2_norm_sq_jerk) + + filter_end = time.ticks_ms() + + norm_options = [None, L1_NORM, L2_NORM_SQUARED] + jerk_options = [False, True] + + all_results = [] + + # reusable scratch buffer, to reduce allocations + features_array = array.array('f', (0 for _ in range(ORDERED_FEATURES_N))) + + for do_jerk in jerk_options: + if do_jerk: + tx = x_jerk + ty = y_jerk + tz = z_jerk + tl1 = l1_norm_jerk + tl2 = l2_norm_sq_jerk + jerk_name = "Jerk" + else: + tx = x + ty = y + tz = z + tl1 = l1_norm + tl2 = l2_norm_sq + jerk_name = "" + + results = calculate_features_of_transform(tx, ty, tz, features_array) + all_results += results + + # jerk_name, "L1Norm" + results = calculate_features_of_norm_transform(tl1, features_array) + all_results += results + + # jerk_name, "MagSq" + results = calculate_features_of_norm_transform(tl2, features_array) + + all_results += results + + features_end = time.ticks_ms() + alloc_duration = time.ticks_diff(filter_start, alloc_start) + filter_duration = time.ticks_diff(filter_end, filter_start) + feature_duration = time.ticks_diff(features_end, filter_end) + + #print('feature-calc-details', alloc_duration, filter_duration, feature_duration) + assert len(all_results) == N_FEATURES, len(all_results) + + return all_results + +# Q25/Q75 dropped +norm_features = [ + FEATURE_MEAN, + FEATURE_MIN, + FEATURE_MAX, + FEATURE_MEDIAN, + FEATURE_IQR, + FEATURE_ENERGY, + FEATURE_STD, + FEATURE_ENTROPY, +] +#norm_features = [] + +def calculate_features_of_norm_transform(m, features_array): + ordered_features(m, features_array) + + results_list = [] + for n in norm_features: + results_list.append(features_array[n]) + + assert len(results_list) == len(norm_features) + + return results_list + + +transform_features = [ + FEATURE_MEAN, + FEATURE_MAX, + FEATURE_MIN, + FEATURE_MEDIAN, + FEATURE_Q25, + FEATURE_Q75, + FEATURE_IQR, + FEATURE_ENERGY, + FEATURE_STD, + FEATURE_ENTROPY, + # TODO: include correlation + # TODO: include 'all'? type features + # TODO: inlcude "sma" feature ? +] +#transform_features = [] + +def calculate_features_of_transform(x, y, z, features_array : array.array): + results_list = [] + + # X + ordered_features(x, features_array) + for n in transform_features: + results_list.append(features_array[n]) + + # Y + ordered_features(y, features_array) + for n in transform_features: + results_list.append(features_array[n]) + + # Z + ordered_features(z, features_array) + for n in transform_features: + results_list.append(features_array[n]) + + # FIXME: this should be a SUM probably, not a concatenation + #combined = x + y + z + #ordered_features(combined, results) + + # FIXME: raises exception + #corr(results, x, y, "XY") + #corr(results, x, z, "XZ") + #corr(results, y, z, "YZ") + + assert len(results_list) == len(transform_features)*3 + return results_list + + diff --git a/examples/har_trees/windower.py b/examples/har_trees/windower.py new file mode 100644 index 0000000..a6773b4 --- /dev/null +++ b/examples/har_trees/windower.py @@ -0,0 +1,49 @@ + +import array + +def empty_array(typecode, length, value=0): + return array.array(typecode, (value for _ in range(length))) + +def shift_array(arr, shift): + for i in range(shift, len(arr)): + arr[i-shift] = arr[i] + + +class TriaxialWindower(): + def __init__(self, length): + self.length = length + self.x_values = empty_array('h', length) + self.y_values = empty_array('h', length) + self.z_values = empty_array('h', length) + + self._valid = 0 + + def full(self): + return self._valid == self.length + + def push(self, xs, ys, zs): + hop = len(xs) + assert len(xs) == hop + assert len(ys) == hop + assert len(zs) == hop + + remaining = self.length - self._valid + if remaining <= hop: + # make space for new data + shift = hop + shift_array(self.x_values, shift) + shift_array(self.y_values, shift) + shift_array(self.z_values, shift) + insert = self.length - shift + else: + insert = self._valid + + self._valid = insert + hop + if self._valid > self.length: + raise ValueError("Hop must be multiple of length") + + + self.x_values[insert:insert+hop] = xs + self.y_values[insert:insert+hop] = ys + self.z_values[insert:insert+hop] = zs + diff --git a/examples/mnist_cnn/README.md b/examples/mnist_cnn/README.md new file mode 100644 index 0000000..e1f15c4 --- /dev/null +++ b/examples/mnist_cnn/README.md @@ -0,0 +1,74 @@ + +# Image classification using Convolutional Neural Network + + +## Install requirements + +Make sure to have Python **3.10** installed. +At the time TinyMaix, used for CNN, only supports Tensorflow <2.14, +which is not supported on Python 3.12 or later. + +Make sure to have the Unix port of MicroPython 1.23 setup. +On Windows you can use Windows Subsystem for Linux (WSL), or Docker. + +Install the dependencies of this example: +```console +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## Run training + +This will train a Convolutional Neural Network using Keras. +The process will output `.tmdl` that is our built model. + +```console +python mnist_train.py +``` + +## Running on host + +```console +micropython -m mip install https://emlearn.github.io/emlearn-micropython/builds/master/x64_6.3/emlearn_cnn.mpy + +micropython mnist_cnn_run.py +``` + +## Running on device (ViperIDE) + +The fastest and easiest to to install on your device is to use Viper IDE. +This will install the library and the example code: +[Run using ViperIDE](https://viper-ide.org/?install=github:emlearn/emlearn-micropython/examples/mnist_cnn) + + +## Running on device (manually) + +!Make sure you have it running successfully on host first. + +Flash your device with a standard MicroPython firmware, +from the MicroPython.org downloads page. + +```console +mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_cnn.mpy +``` + +```console +mpremote cp mnist_cnn.tmdl : +mpremote cp -r data/ : +mpremote run mnist_cnn_run.py +``` + +## Running with live camera input + +This example requires hardware with an ESP32 chip, and an OV2640 camera module. +It has been tested on Lilygo T-Camera Mic v1.6. + +You need to build a custom ESP32 firmware, +and add this C module for camera access: +https://github.com/cnadler86/micropython-camera-API + +``` +mpremote run mnist_cnn_camera.py +``` + diff --git a/examples/mnist_cnn/mnist_cnn_run.py b/examples/mnist_cnn/mnist_cnn_run.py index 982fed3..b50d44e 100644 --- a/examples/mnist_cnn/mnist_cnn_run.py +++ b/examples/mnist_cnn/mnist_cnn_run.py @@ -1,6 +1,6 @@ import array -import tinymaix_cnn +import emlearn_cnn import time import gc @@ -26,7 +26,7 @@ def test_cnn_mnist(): model = None with open(MODEL, 'rb') as f: model_data = array.array('B', f.read()) - model = tinymaix_cnn.new(model_data) + model = emlearn_cnn.new(model_data) # run on some test data for class_no in range(0, 10): diff --git a/examples/mnist_cnn/mnist_train.py b/examples/mnist_cnn/mnist_train.py index ff00b93..7c7a6e7 100644 --- a/examples/mnist_cnn/mnist_train.py +++ b/examples/mnist_cnn/mnist_train.py @@ -54,12 +54,15 @@ def train_mnist(h5_file, epochs=10): model.summary() model.compile(optimizer='adam', loss = "categorical_crossentropy", metrics = ["categorical_accuracy"]) - H = model.fit(x_train, y_train, batch_size=128, epochs=EPOCHS, verbose=1, validation_data = (x_test, y_test), shuffle=True) + H = model.fit(x_train, y_train, batch_size=128, epochs=epochs, verbose=1, validation_data = (x_test, y_test), shuffle=True) model.save(h5_file) def generate_test_files(out_dir, x, y): + if not os.path.exists(out_dir): + os.makedirs(out_dir) + expect_bytes = 28*28*1 classes = numpy.unique(y) X_series = pandas.Series([s for s in x]) @@ -146,29 +149,34 @@ def format_shape(t : tuple[int]): assert os.path.exists(tmld_file), tmld_file assert os.path.exists(header_file), header_file + return tmld_file def main(): h5_file = "mnist_cnn.h5" - tinymaix_tools_dir = './TinyMaix/tools' + tinymaix_tools_dir = '../../dependencies/TinyMaix/tools' + assert os.path.exists(tinymaix_tools_dir), tinymaix_tools_dir + + quantize_data = None # disables quantization + quantize_data = os.path.join(tinymaix_tools_dir, 'quant_img_mnist/') + if quantize_data is not None: + assert os.path.exists(quantize_data) + precision = 'int8' if quantize_data else 'fp32' + # Run training train_mnist(h5_file) #data = x_test[1] - quantize_data = True # disables quantization - quantize_data = './TinyMaix/tools/quant_img_mnist/' - precision = 'int8' if quantize_data else 'fp32' - # Export the model using TinyMaix - - generate_tinymaix_model(h5_file, + out = generate_tinymaix_model(h5_file, input_shape=(28,28,1), output_shape=(1,), tools_dir=tinymaix_tools_dir, precision=precision, quantize_data=quantize_data, ) + print('Wrote model to', out) if __name__ == '__main__': main() diff --git a/examples/mnist_cnn/package.json b/examples/mnist_cnn/package.json new file mode 100644 index 0000000..54e7dba --- /dev/null +++ b/examples/mnist_cnn/package.json @@ -0,0 +1,24 @@ +{ + "name": "emlearn-micropython MNIST CNN image classification example", + "version": "0.3.0", + "description": "Classify handwritten digits using Convolutional Neural Network", + "author": "Jon Nordby", + "license": "MIT", + "readme": "github:emlearn/emlearn-micropython/blob/master/examples/mnist_cnn/README.md", + "keywords": "machinelearning,cnn,convolutionalneuralnetwork,classification,mnist,digits", + "urls": [ + ["fs:mnist_cnn.tmdl", "mnist_cnn.tmdl"], + ["fs:data/mnist_example_0.bin", "data/mnist_example_0.bin"], + ["fs:data/mnist_example_1.bin", "data/mnist_example_1.bin"], + ["fs:data/mnist_example_2.bin", "data/mnist_example_2.bin"], + ["fs:data/mnist_example_3.bin", "data/mnist_example_3.bin"], + ["fs:data/mnist_example_4.bin", "data/mnist_example_4.bin"], + ["fs:data/mnist_example_5.bin", "data/mnist_example_5.bin"], + ["fs:data/mnist_example_6.bin", "data/mnist_example_6.bin"], + ["fs:data/mnist_example_7.bin", "data/mnist_example_7.bin"], + ["fs:data/mnist_example_8.bin", "data/mnist_example_8.bin"], + ["fs:data/mnist_example_9.bin", "data/mnist_example_9.bin"], + ["fs:mnist_cnn_run.py", "mnist_cnn_run.py"] + ], + "deps": [["emlearn", "master"]] +} diff --git a/examples/mnist_cnn/requirements.txt b/examples/mnist_cnn/requirements.txt new file mode 100644 index 0000000..0d879eb --- /dev/null +++ b/examples/mnist_cnn/requirements.txt @@ -0,0 +1,4 @@ +tensorflow-cpu<2.14.1 +numpy<2.0.0 +pandas>=2.0.0 +pillow>=10.0.0 diff --git a/examples/sequence/sequence_lock.py b/examples/sequence/sequence_lock.py index 719ed33..5409e47 100644 --- a/examples/sequence/sequence_lock.py +++ b/examples/sequence/sequence_lock.py @@ -1,7 +1,7 @@ import os import array -import emlneighbors +import emlearn_neighbors TRAINING_STATE = 'training' UNLOCKED_STATE = 'unlocked' @@ -193,9 +193,9 @@ def _reset_model(self): items = self.training_examples features = self.sequence_length-1 - self.model = emlneighbors.new(items, features, items) + self.model = emlearn_neighbors.new(items, features, items) - # XXX: could be dropped if emlneighbors allowed accessing n_items + # XXX: could be dropped if emlearn_neighbors allowed accessing n_items self.training_items = 0 def _get_distances(self, features): diff --git a/examples/soundlevel_iir/.gitignore b/examples/soundlevel_iir/.gitignore new file mode 100644 index 0000000..ef418f5 --- /dev/null +++ b/examples/soundlevel_iir/.gitignore @@ -0,0 +1 @@ +secrets.py diff --git a/examples/soundlevel_iir/README.md b/examples/soundlevel_iir/README.md new file mode 100644 index 0000000..f1ebabb --- /dev/null +++ b/examples/soundlevel_iir/README.md @@ -0,0 +1,263 @@ + +# Sound level meter + +This is a sound level meter implemented in MicroPython with emlearn-micropython. +This can be used for basic sound measurements or noise monitoring over time. +It can also be used as a starting point for other applications that need to process sound. + +The core code can be run on any MicroPyton port (including PC/Unix), +but to run the on-device examples you will need particular hardware (see hardware requirements section). + +![Sound level meter with Blynk dashboard](./soundlevel-blynk-combined.png) + +## Features + +The [soundlevels.py](soundlevels.py) module implements the standard processing typically used in a +sound level meter used for noise measurements: + +* Computes soundlevel in decibels SPL, using a digital microphone +* Supports *A* frequency weighting, or no weighting (*"Z"*) +* Supports *Fast* (125ms), *Slow* (1second) or *no* time integration. +* Supports showing the instantaneous sound level (short Leq) +* Supports summarized measurements: equivalent sound pressure level (Leq), minimum (Lmin), maximum (Lmax) +* Supports percentile-based summarizations: L10, L50, L80 etc (configurable) +* Supports 16 kHz sample rate + +The following emlearn functionality are used in the implementation: + +- `emlearn_iir`. IIR filters to implement the A weighting filter +- `emlearn_arrayutils.linear_map()`. To convert array values between `float` and `int16` + + +#### Notes on measurement correctness + +NOTE: In order to have reasonable output values, +the *microphone sensitivity* **must be correctly specified**. +Ideally you also check/calibrate the output against a know good sound level meter. + +NOTE: There is no compensation for non-linear frequency responses in microphone. + +NOTE: processing is done largely in int16, which limits the dynamic range. + +So the practical accuracy of your measurements will vary based on your hardware (microphone) and these limitations. +Generally this kind of device will be most useful to track relative changes, +and less useful for measuring against an absolute threshold limits (as specified in various noise regulation). +For known performance, get a sound level meter with a class rating (2/1), preferably with certification. + +## Hardware requirements + +The device must have an **I2S** digital microphone. +*PDM* microphone will not work, as they are not yet support in MicroPython (as of late 2024). + +The microcontroller used must support the `machine.I2S` MicroPython module. +An ESP32 based device is recommended. But RP2, stm32, mimxrt also support I2S. +More information about I2S in MicroPython at [miketeachman/micropython-i2s-examples](https://github.com/miketeachman/micropython-i2s-examples). + +Tested devices: + +- LilyGo T-Camera Mic v1.6 + +Untested devices. Theoretically compatible by changing pinout + +- [LilyGo T-Camera S3](https://www.lilygo.cc/products/t-camera-s3) +- Generic ESP32 with external I2S microphone breakout + +Please report back if you get this running on a device not listed here. + +## Install requirements + +Make sure to have Python 3.10+ installed. + +Make sure to have the Unix port of MicroPython 1.23 setup. +On Windows you can use Windows Subsystem for Linux (WSL), or Docker. + +Install the dependencies of this example: +```console +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## Example: Compute soundlevel for an audio file (on host/PC) + +Install the emlearn modules +```console +micropython -m mip install https://emlearn.github.io/emlearn-micropython/builds/master/x64_6.3/emlearn_arrayutils.mpy +micropython -m mip install https://emlearn.github.io/emlearn-micropython/builds/master/x64_6.3/emlearn_iir.mpy +``` + +Compute soundlevels for a file +```console +micropython soundlevel_file.py test_burst.wav +``` + +The output is newline-delimited JSON ([NDJSON](https://github.com/ndjson/ndjson-spec)). +If you want some other format, like CSV - modify the example code. + +## Running on device (ViperIDE) + +The fastest and easiest to to install on your device is to use Viper IDE. +This will install the library and the example code automatically. +[Run using ViperIDE](https://viper-ide.org/?install=github:emlearn/emlearn-micropython/examples/soundlevel_iir) + +In Viper IDE, you can select which example file to run (described below), +and hit the Play button to run it. + +## Example: Compute soundlevels on device + +Flash your device with a standard MicroPython firmware, from the [MicroPython.org downloads](https://micropython.org/download/). + +Ensure you have an I2S microphone, and that the pinout is correct in `soundlevel_live.py`. + +Install the emlearn modules *for your architecture*. ESP32=xtensawin +```console +mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_arrayutils.mpy +mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_iir.mpy +``` + +Copy example code to the device +```console +mpremote cp soundlevel.py : +``` + +Run +```console +mpremote run soundlevel_live.py +``` + +It should show the current soundlevel, which reacts instantly to sound changes. +Every 10 seconds the summarized soundlevels should be logged. + + +## Example: Log soundlevels to cloud over WiFi + +This requires a device which has support for `network.WLAN` MicroPython module. +Typically an ESP32 or RP2. + +This example uses the [Blynk](https://blynk.io/) IoT platform. +So you will need to register for an account there (free plan is sufficcient). + +After you have registered, you will need to 1. Create a Template, 2. Create datastreams, 3. Create a dashboard, and 4. Register a Device. + +#### Create Blynk Template + +The Template structure in Blynk has all the setup related to one type of device, +including the datastreams, dashboard, and devices. + +Go to **Developer Zone -> My Templates**, and hit ***New Template**. + +- Name: Soundlevel Meter +- Hardware: ESP32 +- Connection Type: WiFi + +For reference, see [Blynk.io: Template Quick Setup](https://docs.blynk.io/en/getting-started/template-quick-setup). + +#### Setup Blynk Datastreams + +Create 3 *Virtual Pin* Datastreams for the Template. +The names/pin should be: Leq/V2, Lmin/V3, Lmax/V4. +Each should have the following configuration: + +- Data Type: Double +- Min: 20 +- Max: 120 +- Enable History Data: Yes + +Remember to hit **Save and Apply** after adding the datastreams. + +For reference, see [Blynk.io: Set Up Datastreams](https://docs.blynk.io/en/getting-started/template-quick-setup/set-up-datastreams). + +#### Create Blynk Web Dashboard + +- Go to the *Web Dashboard* tab, click *Edit* +- Add a *Chart* widget +- Edit the Chart widget, and add the 3 datastreams. Leq,Lmin,Lmax +- Save the dashboard + +For reference, see [Blynk.io: Set Up Web Dashboard](https://docs.blynk.io/en/getting-started/template-quick-setup/set-up-web-dashboard). + + +#### Register a device in Blynk + +- In the right hand menu of Blynk dashboard, select `Devices -> Create New Device`. +- Make sure that the selected **Template** is "Soundlevel Meter". +- Hit **Create** to register the device +- Copy the **Auth Token**, you will need it in the next step. + +For reference, see [Blynk.io: Manual Device Activation, Getting Auth Token](https://docs.blynk.io/en/getting-started/activating-devices/manual-device-activation). + +#### Setup the device + +Put your *WiFi network name (SSID)*, *WiFi password* and *Blynk auth token* into a file called `secrets.py`. +Use the following variable names. + +```python +BLYNK_AUTH_TOKEN = 'MY TOKEN HERE' +WIFI_SSID = 'MY WIFI SSID' +WIFI_PASSWORD = 'MY WIFI PASSWORD' +``` + +Install the emlearn modules *for your architecture*. ESP32=xtensawin +```console +mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_arrayutils.mpy +mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_iir.mpy +``` + +Copy example to the device +```console +mpremote cp secrets.py iot_blynk.py soundlevel.py : +``` + +Run the example +```console +mpremote run soundlevel_iot.py +``` + +You should see some log output, and within 30 seconds you should see the first attempts to send data to Blynk. +If the status code is 200, then you should see data appear in your dashboard. + +![Graph over soundlevel in Blynk](./soundlevel-blynk-graph.png) + + +## Example: Show soundlevels on screen + +This example requires hardware with **SSD1306 display**, +in addition to an I2S microphone. +It uses [micropython-nano-gui](https://github.com/peterhinch/micropython-nano-gui) for the screen. +So the example can be adapted to other displays supported by that framework. + +Install the emlearn modules *for your architecture*. ESP32=xtensawin +```console +mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_arrayutils.mpy +mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_iir.mpy +``` + +Install UI framework and screen driver +```console +mpremote mip install "github:peterhinch/micropython-nano-gui/drivers/ssd1306" +mpremote mip install "github:peterhinch/micropython-nano-gui" +``` + +Copy example to the device +```console +mpremote cp color_setup.py soundlevel.py : +``` + +Run the example +``` +mpremote run soundlevel_screen.py +``` + +The screen should update continiously to show the current sound level. + +![Soundlevel being shown on display](./soundlevel-screen.png) + +## Development + +### Running automated tests + +Run automated tests +```console +python test_soundlevel.py +``` + diff --git a/examples/soundlevel_iir/color_setup.py b/examples/soundlevel_iir/color_setup.py new file mode 100644 index 0000000..1be1423 --- /dev/null +++ b/examples/soundlevel_iir/color_setup.py @@ -0,0 +1,13 @@ + +import gc +from machine import I2C, Pin + +from drivers.ssd1306.ssd1306 import SSD1306_I2C as SSD + +# LiLyGo T-Camera Mic +i2c = I2C(scl=Pin(22), sda=Pin(21)) + +oled_width = 128 +oled_height = 64 +gc.collect() # Precaution before instantiating framebuf +ssd = SSD(oled_width, oled_height, i2c) diff --git a/examples/soundlevel_iir/iot_blynk.py b/examples/soundlevel_iir/iot_blynk.py new file mode 100644 index 0000000..90937ba --- /dev/null +++ b/examples/soundlevel_iir/iot_blynk.py @@ -0,0 +1,76 @@ + + +import time +import requests + +class BlynkClient(): + """ + Ref: + https://docs.blynk.io/en/blynk.cloud/device-https-api/upload-set-of-data-with-timestamps-api + + """ + + def __init__(self, + token, + hostname='blynk.cloud', + protocol='https', + ): + + self._batch_url = protocol + '://' + hostname + '/external/api/batch/update?token=' + token + + def post_telemetry(self, values : list[dict[str, float]]): + """ + Send multiple telemetry values. + The 'time' key should be in Unix milliseconds. + """ + stream_values = {} + notime_values = {} + + # Blynk HTTP API currently cannot send multiple timestamped values on multiple streams + # so where time is specified - we shuffle the data into being organized per-stream + # data which is not timestamped is handled sent in a single request + for datapoint in values: + if 'time' in datapoint.keys(): + t = datapoint['time'] + for key, value in datapoint.items(): + if key == 'time': + continue + if key not in stream_values.keys(): + stream_values[key] = [] + stream_values[key].append((t, value)) + else: + notime_values.update(datapoint) + + if notime_values: + self.post_multiple(notime_values) + + for key, datapoints in stream_values.items(): + self.post_timestamped(key, datapoints) + + def post_multiple(self, values : dict[str, str]): + """ + Post multiple values at current time + Each entry in values must have key being a pin name, and the associated value + """ + args = '&'.join([ f'{k}={v}' for k, v in values.items() ]) + url = self._batch_url + '&' + args + print('post-multiple', url) + r = requests.get(url, timeout=5.0) + print('post-multiple-done', r.status_code) + print('post-multiple-back', r.content) + #assert r.status_code == 200, (r.status_code, r.content) + + def post_timestamped(self, pin : str, values : list[tuple[int, float]]): + """ + Post multiple values from different times, for 1 stream + Each entry in values must be a tuple with (timestamp, value) + """ + + payload = values + url = self._batch_url+f'&pin={pin}' + r = requests.post(url, json=payload) + assert r.status_code == 200, (r.status_code, r.content) + + + + diff --git a/examples/soundlevel_iir/iot_thingsboard.py b/examples/soundlevel_iir/iot_thingsboard.py new file mode 100644 index 0000000..ad6a0b1 --- /dev/null +++ b/examples/soundlevel_iir/iot_thingsboard.py @@ -0,0 +1,71 @@ + +import time +import requests + +class ThingsBoard(): + """ + Ref: + https://thingsboard.io/docs/reference/http-api/ + """ + + def __init__(self, + token, + hostname='thingsboard.cloud', + protocol='https', + ): + + + self._telemetry_url = protocol + '://' + hostname + '/api/v1/' + token + '/telemetry' + + def post_telemetry(self, values : list[dict]): + """ + Send multiple telemetry values. + The 'time' key should be in Unix milliseconds. + """ + + def encode_one(d): + o = { 'values': {} } + if 'time' in d: + o['ts'] = d['time'] + for k, v in d.items(): + if k == 'time': + continue + o['values'][k] = v + return o + + payload = [ encode_one(v) for v in values ] + + + r = requests.post(self._telemetry_url, json=payload) + assert r.status_code == 200, (r.status_code, r.content) + +def unix_time_seconds(): + timestamp = time.time() + epoch_year = time.gmtime(0)[0] + + if epoch_year == 2020: + # seconds between 2000 (MicroPython epoch) and 1970 (Unix epoch) + epoch_difference = 946684800 + timestamp = timestamp + epoch_difference + elif epoch_year == 1970: + pass + else: + raise ValueError('Unknown epoch year') + + return float(timestamp) + +# bc3ab311-4e92-11ef-b45a-8f71ad378839 +ACCESS_TOKEN = '7AiV0dXRPWKrxrLcI4wO' +api = ThingsBoard(token=ACCESS_TOKEN) + +t = int(unix_time_seconds() * 1000) + +values = [] +for s in range(0, 60, 10): + v = {'time': t-(s*1000), 'db2': 78.0+s, 'hex': 'ABCEDFE123122312452231DFABCEDF'} + values.append(v) + +api.post_telemetry(values) +print(values) +print('Posted telemetry') + diff --git a/examples/soundlevel_iir/package.json b/examples/soundlevel_iir/package.json new file mode 100644 index 0000000..4cc4d39 --- /dev/null +++ b/examples/soundlevel_iir/package.json @@ -0,0 +1,18 @@ +{ + "name": "emlearn-micropython Soundlevel IIR filter example", + "version": "0.3.0", + "description": "Compute soundlevels, using IIR filters for A-weighting", + "author": "Jon Nordby", + "license": "MIT", + "readme": "github:emlearn/emlearn-micropython/blob/master/examples/soundlevel_iir/README.md", + "keywords": "digitalsignalprocessing,IIR,filters,infiniteimpulseresponsefilters,DSP", + "urls": [ + ["fs:soundlevel.py", "soundlevel.py"], + ["fs:iot_blynk.py", "iot_blynk.py"], + ["fs:soundlevel_live.py", "soundlevel_live.py"], + ["fs:soundlevel_iot.py", "soundlevel_iot.py"], + ["fs:soundlevel_screen.py", "soundlevel_screen.py"], + ["fs:soundlevel_file.py", "soundlevel_file.py"] + ], + "deps": [["emlearn", "master"]] +} diff --git a/examples/soundlevel_iir/soundlevel-blynk-combined.png b/examples/soundlevel_iir/soundlevel-blynk-combined.png new file mode 100644 index 0000000..d1e1242 Binary files /dev/null and b/examples/soundlevel_iir/soundlevel-blynk-combined.png differ diff --git a/examples/soundlevel_iir/soundlevel-blynk-graph.png b/examples/soundlevel_iir/soundlevel-blynk-graph.png new file mode 100644 index 0000000..a1ba35a Binary files /dev/null and b/examples/soundlevel_iir/soundlevel-blynk-graph.png differ diff --git a/examples/soundlevel_iir/soundlevel-screen.png b/examples/soundlevel_iir/soundlevel-screen.png new file mode 100644 index 0000000..5fc8c18 Binary files /dev/null and b/examples/soundlevel_iir/soundlevel-screen.png differ diff --git a/examples/soundlevel_iir/soundlevel.py b/examples/soundlevel_iir/soundlevel.py new file mode 100644 index 0000000..83b9122 --- /dev/null +++ b/examples/soundlevel_iir/soundlevel.py @@ -0,0 +1,260 @@ + +import math +import struct +import array +from collections import deque + +import emlearn_iir +from emlearn_arrayutils import linear_map +#import emlearn_iir_q15 +#from iir_python import IIRFilter + +class IIRFilterEmlearn: + + def __init__(self, coefficients): + c = array.array('f', coefficients) + self.iir = emlearn_iir.new(c) + def process(self, samples): + self.iir.run(samples) + +class IIRFilterEmlearnFixed: + + def __init__(self, coefficients): + c = emlearn_iir_q15.convert_coefficients(coefficients) + self.iir = emlearn_iir_q15.new(c) + def process(self, samples): + self.iir.run(samples) + + +# A method for computing A weighting filters etc for any sample-rate +# https://www.dsprelated.com/thread/10546/a-weighting-filter + +def assert_array_typecode(arr, typecode): + actual_typecode = str(arr)[7:8] + assert actual_typecode == typecode, (actual_typecode, typecode) + +a_filter_16k = [ + 1.0383002230320646, 0.0, 0.0, 1.0, -0.016647242439959593, 6.928267021369795e-05, + 1.0, -2.0, 1.0, 1.0, -1.7070508390293027, 0.7174637059318595, + 1.0, -2.0, 1.0, 1.0, -1.9838868447331497, 0.9839517531763131 +] + +@micropython.native +def rms_micropython_native(arr): + acc = 0.0 + for i in range(len(arr)): + v = arr[i] + p = (v * v) + acc += p + mean = acc / len(arr) + out = math.sqrt(mean) + return out + +# Using a limited-precision aware approach based on Cumulative Moving Average +# https://www.codeproject.com/Articles/807195/Precise-and-safe-calculation-method-for-the-averag +@micropython.viper +def rms_micropython_viper(arr) -> object: + buf = ptr16(arr) # XXX: input MUST BE h/uint16 array + l = int(len(arr)) + cumulated_average : int = 0 + cumulated_remainder : int = 0 + addendum : int = 0 + n_values : int = 0 + for i in range(l): + v = int(buf[i]) + value = (v * v) # square it + n_values += 1 + addendum = value - cumulated_average + cumulated_remainder + cumulated_average += addendum // n_values + cumulated_remainder = addendum % n_values + + # sqrt it + out = math.sqrt(cumulated_average) + return out + +@micropython.native +def time_integrate_native(arr, initial, time_constant, samplerate): + acc = initial + dt = 1.0/samplerate + a = dt / (time_constant + dt) + #print('a', a) + + for i in range(len(arr)): + v = arr[i] + p = (v * v) # power is amplitude squared + # exponential time weighting aka 1st order low-pass filter + #acc = (a*p) + ((1-a)*acc) + acc = acc + a*(p - acc) # exponential time weighting aka 1st order low-pass filter + #acc += p + + return acc + +# Use C module for data conversion +# @micropython.native with a for loop is too slow +def int16_to_float(inp, out): + return linear_map(inp, out, -2**15, 2**15, -1.0, 1.0) + +def float_to_int16(inp, out): + return linear_map(inp, out, -1.0, 1.0, -2**15, 2**15) + + +class Summarizer(): + """Compute common acoustical summarizations of soundlevels""" + + def __init__(self, maxlen): + self._capacity = maxlen + self._deque = deque([], maxlen, 1) + + def reset(self): + while len(self._deque): + self._deque.popleft() + + def full(self): + full = len(self._deque) == self._capacity + return full + + def push(self, value): + # if full, drop oldest value + if len(self._deque) >= self._capacity: + print('summarizer-queue-overflow') + self._deque.popleft() + + self._deque.append(value) + + def compute_leq(self): + # NOTE: assumes that the values in the deque are decibel values + avg = sum((pow(10, db/10.0) for db in self._deque)) / len(self._deque) + leq = 10*math.log10(avg) + return leq + + def compute_minmax(self): + mn = min(self._deque) + mx = max(self._deque) + return mn, mx + + def compute_percentiles(self, percentiles : list[float]) -> list[float]: + values = sorted(self._deque) + out = [] + for p in percentiles: + # find closest value. + # XXX: no interpolation + idx = round(self._capacity * (p/100.0)) + #print(p, idx, idx/self._capacity, self._capacity, len(self._deque)) + out.append(values[idx]) + + return out + + def compute_all(self, levels=(10, 50, 90)) -> dict[str, float]: + + leq = self.compute_leq() + lmin, lmax = self.compute_minmax() + metrics = { + 'Lmin': lmin, + 'Lmax': lmax, + 'Leq': leq, + } + ln_values = self.compute_percentiles([100-ln for ln in levels]) + for ln, value in zip(levels, ln_values): + metrics[f'L{ln}'] = value + + return metrics + + +class SoundlevelMeter(): + + def __init__(self, buffer_size, + samplerate, + mic_sensitivity, + time_integration=0.125, + frequency_weighting='A', + summary_interval=60.0, + summary_capacity=2, + ): + + buffer_duration = samplerate / float(buffer_size) + + self._buffer_size = buffer_size + self._sensitivity_dbfs = mic_sensitivity + + buffer_duration = buffer_size / samplerate + assert buffer_duration <= 0.125 + + self._power_integrated_fast = 0.0 + self._samplerate = samplerate + self._time_integration = time_integration + + if not frequency_weighting: + self.frequency_filter = None + elif frequency_weighting == 'A': + #self.frequency_filter = IIRFilter(a_filter_16k) + self.frequency_filter = IIRFilterEmlearn(a_filter_16k) + #self.frequency_filter = IIRFilterEmlearnFixed(a_filter_16k) + + self.float_array = array.array('f', (0 for _ in range(buffer_size))) + else: + raise ValueError('Unsupported frequency_weighting') + + self._summary_interval = summary_interval + self._summary_capacity = summary_capacity + per_summary_interval = int((1/buffer_duration)*summary_interval) + self._summarizer = Summarizer(per_summary_interval) + self._summary_queue = deque([], self._summary_capacity) + + self._last_value = None + + def last_value(self): + return self._last_value + + def compute_level(self, samples): + assert len(self.float_array) == self._buffer_size + assert len(samples) == self._buffer_size + assert_array_typecode(samples, 'h') + + # Apply frequency weighting + if self.frequency_filter: + int16_to_float(samples, self.float_array) + self.frequency_filter.process(self.float_array) + float_to_int16(self.float_array, samples) + + spl_max = 94 - self._sensitivity_dbfs + ref = 2**15 + + # no integration - "linear" + if self._time_integration is None: + rms = rms_micropython_native(samples) + # FIXME: causes math domain error + #rms = rms_micropython_viper(samples) + else: + p = time_integrate_native(samples, + self._power_integrated_fast, + self._time_integration, + self._samplerate, + ) + self._power_integrated_fast = p + rms = math.sqrt(p) + + level = 20*math.log10(rms/ref) + level += (spl_max) + + self._last_value = level + return level + + def process(self, samples): + # compute soundlevel for this instant + level = self.compute_level(samples) + + if self._summary_interval > 0.0: + self._summarizer.push(level) + + # update summarized metrics + if self._summarizer.full(): + metrics = self._summarizer.compute_all() + if len(self._summary_queue) >= self._summary_capacity: + self._summary_queue.popleft() # drop oldest + print('summary-queue-overflow') + self._summary_queue.append(metrics) + self._summarizer.reset() + + return level + + diff --git a/examples/soundlevel_iir/soundlevel_file.py b/examples/soundlevel_iir/soundlevel_file.py new file mode 100644 index 0000000..0ff2eeb --- /dev/null +++ b/examples/soundlevel_iir/soundlevel_file.py @@ -0,0 +1,127 @@ + +import sys +import struct +import array + +from soundlevel import SoundlevelMeter + +# TODO: create/use a standard wavefile module, installed via mip + +def read_wave_header(wav_file): + # based on https://github.com/miketeachman/micropython-i2s-examples/blob/master/examples/wavplayer.py + # Copyright (c) 2022 Mike Teachman + + chunk_ID = wav_file.read(4) + if chunk_ID != b"RIFF": + raise ValueError("WAV chunk ID invalid") + chunk_size = wav_file.read(4) + format = wav_file.read(4) + if format != b"WAVE": + raise ValueError("WAV format invalid") + sub_chunk1_ID = wav_file.read(4) + if sub_chunk1_ID != b"fmt ": + raise ValueError("WAV sub chunk 1 ID invalid") + sub_chunk1_size = wav_file.read(4) + audio_format = struct.unpack("WAV converters add + # binary data before "data". So, read a fairly large + # block of bytes and search for "data". + + binary_block = wav_file.read(200) + offset = binary_block.find(b"data") + if offset == -1: + raise ValueError("WAV sub chunk 2 ID not found") + + first_sample_offset = 44 + offset + + return num_channels, sample_rate, bits_per_sample, first_sample_offset + + +def int16_from_bytes(buf, endianness='<'): + + fmt = endianness + 'h' + gen = (struct.unpack(fmt, buf[i:i+3])[0] for i in range(0, len(buf), 2)) + arr = array.array('h', gen) + assert len(arr) == len(buf)//2 + return arr + + +def read_wav(file, samplerate, frames): + """ + """ + + file_channels, file_sr, file_bitdepth, data_offset = read_wave_header(file) + assert samplerate == file_sr, (samplerate, file_sr) + # only int16 mono is supported + assert file_channels == 1, file_channels + assert file_bitdepth == 16, file_bitdepth + + #samples = array.array('h', (0 for _ in range(frames))) + file.seek(data_offset) + + while True: + data = file.read(2*frames) + read_frames = len(data)//2 + samples = int16_from_bytes(data) + yield samples + + +def process_file(audiofile, mic_sensitivity, + chunk_duration=0.125, + frequency_weighting='A', + time_integration=0.125, + summary_interval=10.0): + """Compute soundlevels for a .wav file""" + + # NOTE: only 16 kHz sample rate supported + SR = 16000 + chunk_size = int(SR * chunk_duration) + meter = SoundlevelMeter(chunk_size, SR, mic_sensitivity, + frequency_weighting=frequency_weighting, + time_integration=time_integration, + summary_interval=summary_interval, + ) + + t = 0.0 # seconds + for chunk in read_wav(audiofile, SR, frames=chunk_size): + + short_leq = meter.process(chunk) + dt = len(chunk)/float(SR) + t += dt + + out = dict(time=t, short_leq=short_leq) + + if len(meter._summary_queue): + summaries = meter._summary_queue.pop() + out.update(summaries) + + yield out + + +def main(): + + import json + mic_sensitivity = 0.0 + + if len(sys.argv) != 2: + raise ValueError('Usage: micropython soundlevel_file.py AUDIO.wav') + + audio_path = sys.argv[1] + with open(audio_path, 'rb') as audiofile: + measurements = process_file(audiofile, mic_sensitivity) + for m in measurements: + cleaned = { k: round(v, 4) for k, v in m.items() } + serialized = json.dumps(cleaned) + print(serialized) + +if __name__ == '__main__': + main() + diff --git a/examples/soundlevel_iir/soundlevel_iot.py b/examples/soundlevel_iir/soundlevel_iot.py new file mode 100644 index 0000000..086ff3f --- /dev/null +++ b/examples/soundlevel_iir/soundlevel_iot.py @@ -0,0 +1,124 @@ + +""" +IoT-enabled sound level meter example - sends data to an IoT platform +""" + +import machine +import network +import time +import array +from machine import Pin, I2S + +from soundlevel import SoundlevelMeter +import secrets + +# Microphone sensitivity. +# Specifies how to convert digital samples to physical sound level pressure +# Use the dataheet as a starting point. Ideally calibrate with a known sound level +MIC_DBFS=-26 # MSM261S4030H0R + +AUDIO_SAMPLERATE = 16000 + +# NOTE: ensure that the pins match your device +# Example is shown for a LilyGo T-Camera Mic v1.6 +audio_in = I2S(0, + sck=Pin(26), + ws=Pin(32), + sd=Pin(33), + mode=I2S.RX, + bits=16, + format=I2S.MONO, + rate=AUDIO_SAMPLERATE, + ibuf=40000, +) + + +# allocate sample arrays +chunk_duration = 0.125 +chunk_samples = int(AUDIO_SAMPLERATE * chunk_duration) +mic_samples = array.array('h', (0 for _ in range(chunk_samples))) # int16 +# memoryview used to reduce heap allocation in while loop +mic_samples_mv = memoryview(mic_samples) + +meter = SoundlevelMeter(buffer_size=chunk_samples, + samplerate=AUDIO_SAMPLERATE, + mic_sensitivity=MIC_DBFS, + time_integration=0.125, + frequency_weighting='A', + summary_interval=20.0, +) + +wlan = network.WLAN(network.STA_IF) + +if secrets.BLYNK_AUTH_TOKEN: + from iot_blynk import BlynkClient + BLYNK_PIN_MAPPING = dict(Leq='v2', Lmin='v3', Lmax='v4') + api = BlynkClient(token=secrets.BLYNK_AUTH_TOKEN) +else: + raise ValueError('No IoT API configured') + +def audio_ready_callback(arg): + global soundlevel_db + start_time = time.ticks_ms() + + meter.process(mic_samples) + + duration = time.ticks_diff(time.ticks_ms(), start_time) + if duration >= 125: + print('warn-audio-processing-too-slow', time.ticks_ms(), duration) + + # re-trigger audio + num_read = audio_in.readinto(mic_samples_mv) + +def wifi_connect(): + + wlan.active(True) + wlan.connect(secrets.WIFI_SSID, secrets.WIFI_PASSWORD) + + +def main(): + STATUS_UPDATE_INTERVAL = 5.0 + next_status_update = time.time() + STATUS_UPDATE_INTERVAL + + # Setting a callback function makes the readinto() method non-blocking + audio_in.irq(audio_ready_callback) + # Start microphonee readout. Callback will re-trigger it + num_read = audio_in.readinto(mic_samples_mv) + print('audio-start', num_read) + + # Start connecting to WiFi + wifi_connect() + + + while True: + + # check our current status + if time.time() >= next_status_update: + print('main-alive-tick', wlan.status()) + + if not wlan.isconnected(): + print('wifi-reconnect') + wlan.active(False) + wifi_connect() + + next_status_update = time.time() + STATUS_UPDATE_INTERVAL + + # check for soundlevel data ready to send + if len(meter._summary_queue) and wlan.isconnected(): + print('send-metrics', len(meter._summary_queue)) + m = meter._summary_queue.pop() + vv = { pin: m[key] for key, pin in BLYNK_PIN_MAPPING.items() } + values = [ vv ] + try: + api.post_telemetry(values) + except Exception as e: + print('post-error', e) + + + time.sleep_ms(10) + + +if __name__ == '__main__': + main() + + diff --git a/examples/soundlevel_iir/soundlevel_live.py b/examples/soundlevel_iir/soundlevel_live.py new file mode 100644 index 0000000..d9faae4 --- /dev/null +++ b/examples/soundlevel_iir/soundlevel_live.py @@ -0,0 +1,95 @@ + +""" +Basic sound level meter example - just prints to the console +""" + +import time +import struct +import array + +from machine import Pin, I2S + +from soundlevel import SoundlevelMeter + +# Microphone sensitivity. +# Specifies how to convert digital samples to physical sound level pressure +# Use the dataheet as a starting point. Ideally calibrate with a known sound level +MIC_DBFS=-26 # MSM261S4030H0R + +AUDIO_SAMPLERATE = 16000 + +# NOTE: ensure that the pins match your device +# Example is shown for a LilyGo T-Camera Mic v1.6 +audio_in = I2S(0, + sck=Pin(26), + ws=Pin(32), + sd=Pin(33), + mode=I2S.RX, + bits=16, + format=I2S.MONO, + rate=AUDIO_SAMPLERATE, + ibuf=40000, +) + + +# allocate sample arrays +chunk_duration = 0.125 +chunk_samples = int(AUDIO_SAMPLERATE * chunk_duration) +mic_samples = array.array('h', (0 for _ in range(chunk_samples))) # int16 +# memoryview used to reduce heap allocation in while loop +mic_samples_mv = memoryview(mic_samples) + +meter = SoundlevelMeter(buffer_size=chunk_samples, + samplerate=AUDIO_SAMPLERATE, + mic_sensitivity=MIC_DBFS, + time_integration=0.125, + frequency_weighting='A', + summary_interval=10.0, +) + + +def audio_ready_callback(arg): + global soundlevel_db + start_time = time.ticks_ms() + + meter.process(mic_samples) + + duration = time.ticks_diff(time.ticks_ms(), start_time) + if duration >= 125: + print('warn-audio-processing-too-slow', time.ticks_ms(), duration) + + # re-trigger audio + num_read = audio_in.readinto(mic_samples_mv) + + +def main(): + + next_display_update = 0.0 + + # Setting a callback function makes the readinto() method non-blocking + audio_in.irq(audio_ready_callback) + # Start microphonee readout. Callback will re-trigger it + num_read = audio_in.readinto(mic_samples_mv) + print('audio-start', num_read) + + while True: + if time.time() >= next_display_update: + soundlevel_db = meter.last_value() + if soundlevel_db is not None: + print(f'soundlevel shortleq={soundlevel_db:.1f}', end='\r') + last_display_update = time.time() + 0.125 + + if len(meter._summary_queue): + m = meter._summary_queue.pop() + print('soundlevels-summary ', end='') + for k, v in m.items(): + print(f'{k}={v:.1f} ', end='') + print('') + + time.sleep_ms(10) + + +if __name__ == '__main__': + main() + + diff --git a/examples/soundlevel_iir/soundlevel_screen.py b/examples/soundlevel_iir/soundlevel_screen.py new file mode 100644 index 0000000..c756b13 --- /dev/null +++ b/examples/soundlevel_iir/soundlevel_screen.py @@ -0,0 +1,152 @@ + +import math +import gc +import time +import random +import struct +import array +import os +from machine import Pin +from machine import I2S +from machine import I2C, Pin +from machine import Pin, SoftI2C + +from soundlevel import SoundlevelMeter + +# mpremote mip install "github:peterhinch/micropython-nano-gui/drivers/ssd1306" +from color_setup import ssd + +# mpremote mip install "github:peterhinch/micropython-nano-gui" +# On a monochrome display Writer is more efficient than CWriter. +from gui.core.writer import Writer +from gui.core.nanogui import refresh +from gui.widgets.meter import Meter +from gui.widgets.label import Label + +# Fonts +import gui.fonts.courier20 as fixed +import gui.fonts.font6 as small + + +def render_display(db : float): + start_time = time.ticks_ms() + + ssd.fill(0) + #refresh(ssd) + + Writer.set_textpos(ssd, 0, 0) # In case previous tests have altered it + wri = Writer(ssd, fixed, verbose=False) + wri.set_clip(False, False, False) + + warn_text = 'Loud!' + numfield = Label(wri, 5, 0, wri.stringlen('99.9')) + textfield = Label(wri, 40, 34, wri.stringlen(warn_text)) + + numfield.value('{:5.1f} dBa'.format(db)) + + if db > 75.0: + textfield.value(warn_text, True) + else: + textfield.value('') + + refresh(ssd) + + duration = time.ticks_ms() - start_time + print('render-display-done', duration) + + + +def flip_display(ssd, vertical=False): + """ + Vertical flip for SSD1306 + """ + + SEGREMAP = 0xA0 + COMSCANINC = 0xc0 + COMSCANDEC = 0xc8 + + if vertical: + ssd.write_cmd(SEGREMAP | 0x01) + ssd.write_cmd(COMSCANDEC) + else: + ssd.write_cmd(SEGREMAP) + ssd.write_cmd(COMSCANINC) + + +AUDIO_BUFFER_LENGTH = 40000 +AUDIO_BITDEPTH = 16 +AUDIO_FORMAT = I2S.MONO +AUDIO_SAMPLERATE = 16000 + +SCK_PIN = 26 +WS_PIN = 32 +SD_PIN = 33 + +audio_in = I2S(0, + sck=Pin(SCK_PIN), + ws=Pin(WS_PIN), + sd=Pin(SD_PIN), + mode=I2S.RX, + bits=AUDIO_BITDEPTH, + format=AUDIO_FORMAT, + rate=AUDIO_SAMPLERATE, + ibuf=AUDIO_BUFFER_LENGTH, +) + +# allocate sample arrays +chunk_samples = int(AUDIO_SAMPLERATE * 0.125) +mic_samples = array.array('h', (0 for _ in range(chunk_samples))) # int16 +# memoryview used to reduce heap allocation in while loop +mic_samples_mv = memoryview(mic_samples) + +next_display_update = 0.0 + +MIC_DBFS=-26 # MSM261S4030H0R + +meter = SoundlevelMeter(buffer_size=chunk_samples, + samplerate=AUDIO_SAMPLERATE, + mic_sensitivity=MIC_DBFS, + time_integration=0.125, + frequency_weighting='A', + summary_interval=0, +) + + +def audio_ready_callback(arg): + start_time = time.ticks_ms() + + meter.process(mic_samples) + + duration = time.ticks_diff(time.ticks_ms(), start_time) + if duration >= 125: + print('warn-audio-processing-too-slow', time.ticks_ms(), duration) + + # re-trigger audio + num_read = audio_in.readinto(mic_samples_mv) + + +def main(): + + flip_display(ssd, vertical=False) + + # setting a callback function makes the readinto() method Non-Blocking + audio_in.irq(audio_ready_callback) + + # Start microphoe readout. Callback will re-trigger it + num_read = audio_in.readinto(mic_samples_mv) + print('audio-start', num_read) + + while True: + if time.time() >= next_display_update: + soundlevel_db = meter.last_value() + if soundlevel_db is not None: + render_display(db=soundlevel_db) + last_display_update = time.time() + 0.200 + + time.sleep_ms(10) + + +if __name__ == '__main__': + main() + + diff --git a/examples/soundlevel_iir/test_soundlevel.py b/examples/soundlevel_iir/test_soundlevel.py new file mode 100644 index 0000000..43c72c7 --- /dev/null +++ b/examples/soundlevel_iir/test_soundlevel.py @@ -0,0 +1,21 @@ + +import struct +import array + +from soundlevel import SoundlevelMeter +from soundlevel_file import process_file + +def test_soundlevel_time_integration(): + """ """ + + with open('out.csv', 'w') as out: + with open('test_burst.wav', 'rb') as f: + + # FIXME: check that the level drops off at the expected rate + +# TODO: add test for A weighting filter + + +if __name__ == '__main__': + test_soundlevel_time_integration() + diff --git a/examples/xor_trees/README.md b/examples/xor_trees/README.md new file mode 100644 index 0000000..a93a38d --- /dev/null +++ b/examples/xor_trees/README.md @@ -0,0 +1,60 @@ + +# XOR using Random Forest + +The XOR problem stems from the famous Perceptrons (1969) book +by Marvin Minsky and Seymour Papert. +It can be trivially solved by a decision-tree or decision-tree ensemble. +Thus it is a kind of "Hello World" example. +Simple and an OK sanity check, but particularly useful. + +## Install requirements + +Make sure to have Python 3.10+ installed. + +Make sure to have the Unix port of MicroPython 1.23 setup. +On Windows you can use Windows Subsystem for Linux (WSL), or Docker. + +```console +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## Run training + +This will train a RandomForest model using scikit-learn, and output `xor_model.csv` + +```console +python xor_train.py +``` + +## Running on host + +```console +micropython -m mip install https://emlearn.github.io/emlearn-micropython/builds/master/x64_6.3/emlearn_trees.mpy + +micropython xor_run.py +``` + +## Running on device (Viper IDE) + +The fastest and easiest to to install on your device is to use Viper IDE. +This will install the library and the example code: +[Run using ViperIDE](https://viper-ide.org/?install=github:emlearn/emlearn-micropython/examples/xor_trees) + + +## Running on device (manually) + +!Make sure you have it running successfully on host first. + +This command is for ESP32 (xtensawin). +For other hardware, replace the string. + +```console +mpremote mip install https://emlearn.github.io/emlearn-micropython/builds/master/xtensawin_6.3/emlearn_trees.mpy +mpremote cp xor_model.csv : +``` + +```console +mpremote run xor_run.py +``` diff --git a/examples/xor_trees/package.json b/examples/xor_trees/package.json new file mode 100644 index 0000000..04f52b6 --- /dev/null +++ b/examples/xor_trees/package.json @@ -0,0 +1,14 @@ +{ + "name": "emlearn-micropython XOR classification example", + "version": "0.3.0", + "description": "XOR classification Hello World using Random Forest", + "author": "Jon Nordby", + "license": "MIT", + "readme": "github:emlearn/emlearn-micropython/blob/master/examples/xor_trees/README.md", + "keywords": "machinelearning,randomforest,classification,decisiontree", + "urls": [ + ["fs:xor_run.py", "xor_run.py"], + ["fs:xor_model.csv", "xor_model.csv"] + ], + "deps": [["emlearn", "master"]] +} diff --git a/examples/xor_trees/requirements.txt b/examples/xor_trees/requirements.txt new file mode 100644 index 0000000..8424ebb --- /dev/null +++ b/examples/xor_trees/requirements.txt @@ -0,0 +1,3 @@ +emlearn>=0.21.0 +scikit-learn>=1.5.0 +setuptools>=75.0.0 diff --git a/examples/xor_trees/xor_run.py b/examples/xor_trees/xor_run.py index 7d9b20e..5d220cf 100644 --- a/examples/xor_trees/xor_run.py +++ b/examples/xor_trees/xor_run.py @@ -1,13 +1,13 @@ # device/micropython code -import emltrees +import emlearn_trees import array -model = emltrees.new(5, 30, 2) +model = emlearn_trees.new(5, 30, 2) # Load a CSV file with the model with open('xor_model.csv', 'r') as f: - emltrees.load_model(model, f) + emlearn_trees.load_model(model, f) # run it max_val = (2**15-1) # 1.0 as int16 diff --git a/src/emlearn_arrayutils/Makefile b/src/emlearn_arrayutils/Makefile new file mode 100644 index 0000000..f125341 --- /dev/null +++ b/src/emlearn_arrayutils/Makefile @@ -0,0 +1,69 @@ +# Location of top-level MicroPython directory +MPY_DIR = ../../micropython + +# Architecture to build for (x86, x64, armv6m, armv7m, xtensa, xtensawin) +ARCH = x64 + +# The ABI version for .mpy files +MPY_ABI_VERSION := 6.3 + +DIST_DIR := ../../dist/$(ARCH)_$(MPY_ABI_VERSION) + +# Name of module +MOD = emlearn_arrayutils + +# Source files (.c or .py) +SRC = modarrayutils.c + +# Stuff to make soft-float work +# If symbols are undefined, use tools/find_symbols.py to locate object files to add +SOFTFP_O := +SOFTFP_ENABLE := 0 + +ARM_SOFTFP_O := _arm_cmpsf2.o lesf2.o divsf3.o _thumb1_case_uqi.o _arm_fixsfsi.o floatsisf.o fixsfsi.o eqsf2.o gesf2.o addsf3.o mulsf3.o subsf3.o _clzsi2.o _udivsi3.o +ARM_SOFTFP_O += _arm_muldivsf3.o _arm_addsubsf3.o + +ifeq ($(ARCH), xtensawin) + SOFTFP_O += _divsf3.o _ashrdi3.o + SOFTFP_ENABLE=1 +endif + +ifeq ($(ARCH), armv6m) + SOFTFP_O += $(ARM_SOFTFP_O) + SOFTFP_ENABLE=1 +endif +ifeq ($(ARCH), armv7m) + SOFTFP_O += $(ARM_SOFTFP_O) + SOFTFP_ENABLE=1 +endif + +ifeq ($(SOFTFP_ENABLE), 1) + SRC_O += $(SOFTFP_O) + #CLEAN_EXTRA += $(SOFTFP_O) +endif + +# Releases +DIST_FILE = $(DIST_DIR)/$(MOD).mpy +$(DIST_DIR): + mkdir -p $@ + +$(DIST_FILE): $(MOD).mpy $(DIST_DIR) + cp $< $@ + +# Include to get the rules for compiling and linking the module +include $(MPY_DIR)/py/dynruntime.mk + +# CROSS is defined by the included +LIBGCC_FILENAME = $(shell $(CROSS)gcc $(CFLAGS) -print-libgcc-file-name) +$(info $(LIBGCC_FILENAME)) + +CFLAGS += -I$(CMSIS_DSP_DIR)/Include + +_arm_cmpsf2.o: + $(CROSS)ar -x $(LIBGCC_FILENAME) $(SOFTFP_O) + +_divsf3.o: + $(CROSS)ar -x $(LIBGCC_FILENAME) $(SOFTFP_O) + + +dist: $(DIST_FILE) diff --git a/src/emlearn_arrayutils/modarrayutils.c b/src/emlearn_arrayutils/modarrayutils.c new file mode 100644 index 0000000..f6f2861 --- /dev/null +++ b/src/emlearn_arrayutils/modarrayutils.c @@ -0,0 +1,139 @@ +// Include the header file to get access to the MicroPython API +#include "py/dynruntime.h" + +#include + +// memset is used by some standard C constructs +#if !defined(__linux__) +void *memcpy(void *dst, const void *src, size_t n) { + return mp_fun_table.memmove_(dst, src, n); +} +void *memset(void *s, int c, size_t n) { + return mp_fun_table.memset_(s, c, n); +} + +void NORETURN abort() { + while (1) { + ; + } +} + +int +__aeabi_idiv0(int return_value) { + return return_value; +} + +long long +__aeabi_ldiv0(long long return_value) { + return return_value; +} + +#endif + + + +#define MAP_LINEAR(x, in_min, in_max, out_min, out_max) do { \ + const float out = ((x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min) \ + return max(out_min, min(out, out_max)); \ +} + +#define max(a,b) \ +({ \ + __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; \ +}) + +#define min(a,b) \ +({ \ + __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a < _b ? _a : _b; \ +}) + +float +map_linear(float x, float in_min, float in_max, float out_min, float out_max) { + const float out = ((x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min); + return max(out_min, min(out, out_max)); +} + + +int get_typecode_size(char typecode) +{ + // TODO: support int8 also + if (typecode == 'h') { + return 2; + } else if (typecode == 'f') { + return 4; + } else { + // unsupported + return 0; + } +} + +/* linear_map + +Map the elements of an an array linearly between one range of values to another range. +Also accepts an output array, which may be of a different type than the input. + +See map() in Arduino https://www.arduino.cc/reference/en/language/functions/math/map/ + */ +static mp_obj_t +arrayutils_linear_map(size_t n_args, const mp_obj_t *args) { + + // Extract arguments + mp_buffer_info_t in_bufinfo; + mp_get_buffer_raise(args[0], &in_bufinfo, MP_BUFFER_RW); + mp_buffer_info_t out_bufinfo; + mp_get_buffer_raise(args[1], &out_bufinfo, MP_BUFFER_RW); + const float in_min = mp_obj_get_float_to_f(args[2]); + const float in_max = mp_obj_get_float_to_f(args[3]); + const float out_min = mp_obj_get_float_to_f(args[4]); + const float out_max = mp_obj_get_float_to_f(args[5]); + + // Check lengths + const int in_length = in_bufinfo.len / get_typecode_size(in_bufinfo.typecode); + const int out_length = in_bufinfo.len / get_typecode_size(in_bufinfo.typecode); + if (in_length == 0 || out_length == 0) { + mp_raise_ValueError(MP_ERROR_TEXT("unsupported array typecode")); + } + if (in_length != out_length) { + mp_raise_ValueError(MP_ERROR_TEXT("length mismatch")); + } + + // Actually perform the conversions + const char in_type = in_bufinfo.typecode; + const char out_type = out_bufinfo.typecode; + // FIXME: support f/f and h/h + if (in_type == 'h' && out_type == 'f') { + const int16_t *in = in_bufinfo.buf; + float *out = out_bufinfo.buf; + for (int i=0; i rtol: + errors += 1 + + assert errors <= errortol, (errors) + + +def test_arrayutils_linear_map_float_int16(): + + length = 2000 + int16 = array.array('h', (i-length//2 for i in range(length))) + flt = array.array('f', (0 for _ in range(length))) + out = array.array('h', (0 for _ in range(length))) + + # roundtrip should be (approx) equal + start = time.ticks_us() + linear_map(int16, flt, -2**15, 2**15, -1.0, 1.0) + linear_map(flt, out, -1.0, 1.0, -2**15, 2**15) + d = time.ticks_diff(time.ticks_us(), start) + print('linear_map int16>float>int16', length, d/1000.0) + assert_almost_equal(out, int16) + + +test_arrayutils_linear_map_float_int16() + +# Other functionality +# reinterpret +# incorrect lengths, should raise +# roundtrip should be approx equal +# bytearray <-> int16 LE / BE + +# initialize filled +#arr = make_filled(length, value) +#length should be correct +#each element should be equal to value + diff --git a/tests/test_cnn.py b/tests/test_cnn.py index 805e5aa..1bf439e 100644 --- a/tests/test_cnn.py +++ b/tests/test_cnn.py @@ -1,6 +1,6 @@ import array -import tinymaix_cnn +import emlearn_cnn MNIST_MODEL = 'examples/mnist_cnn/mnist_cnn.tmdl' MNIST_DATA_DIR = 'examples/mnist_cnn/data/' @@ -10,7 +10,7 @@ def test_cnn_create(): model = None with open(MNIST_MODEL, 'rb') as f: model_data = array.array('B', f.read()) - model = tinymaix_cnn.new(model_data) + model = emlearn_cnn.new(model_data) # TODO: enable these checks #wrong_type = array.array('f', []) @@ -40,7 +40,7 @@ def test_cnn_mnist(): model = None with open(MNIST_MODEL, 'rb') as f: model_data = array.array('B', f.read()) - model = tinymaix_cnn.new(model_data) + model = emlearn_cnn.new(model_data) correct = 0 for class_no in range(0, 10): diff --git a/tests/test_fft.py b/tests/test_fft.py index db5f9e4..ba39807 100644 --- a/tests/test_fft.py +++ b/tests/test_fft.py @@ -1,5 +1,5 @@ -import emlfft +import emlearn_fft import array import gc @@ -14,7 +14,7 @@ def test_fft_del(): fft_length = 256 - model = emlfft.FFT(fft_length) + model = emlearn_fft.FFT(fft_length) after_new = gc.mem_alloc() added = after_new - before_new @@ -36,9 +36,9 @@ def test_fft_run(): fft_length = 128 in_real = array.array('f', (0.0 for _ in range(fft_length))) in_imag = array.array('f', (0.0 for _ in range(fft_length))) - model = emlfft.FFT(fft_length) + model = emlearn_fft.FFT(fft_length) - emlfft.fill(model, fft_length) + emlearn_fft.fill(model, fft_length) model.run(in_real, in_imag) # FIXME: use some reasonable input data and assert the output data diff --git a/tests/test_iir.py b/tests/test_iir.py index 2f932ff..b8f165e 100644 --- a/tests/test_iir.py +++ b/tests/test_iir.py @@ -1,5 +1,5 @@ -import emliir +import emlearn_iir import array import gc @@ -26,7 +26,7 @@ def test_iir_del(): -1.999111423794296, 0.9991118187443988 ]) - model = emliir.new(coefficients) + model = emlearn_iir.new(coefficients) after_new = gc.mem_alloc() added = after_new - before_new diff --git a/tests/test_iir_q15.py b/tests/test_iir_q15.py index 287a84e..43789cc 100644 --- a/tests/test_iir_q15.py +++ b/tests/test_iir_q15.py @@ -1,6 +1,6 @@ -import eml_iir_q15 +import emlearn_iir_q15 import array import gc @@ -23,7 +23,7 @@ def test_convert_coefficients(): inp = EXAMPLE_COEFFICIENTS - out = eml_iir_q15.convert_coefficients(inp) + out = emlearn_iir_q15.convert_coefficients(inp) def test_iir_del(): @@ -33,11 +33,11 @@ def test_iir_del(): gc.enable() gc.collect() - coefficients = eml_iir_q15.convert_coefficients(EXAMPLE_COEFFICIENTS) + coefficients = emlearn_iir_q15.convert_coefficients(EXAMPLE_COEFFICIENTS) before_new = gc.mem_alloc() - model = eml_iir_q15.new(coefficients) + model = emlearn_iir_q15.new(coefficients) after_new = gc.mem_alloc() added = after_new - before_new diff --git a/tests/test_kmeans.py b/tests/test_kmeans.py index 3301c79..3759ef2 100644 --- a/tests/test_kmeans.py +++ b/tests/test_kmeans.py @@ -1,5 +1,5 @@ -import emlkmeans +import emlearn_kmeans import array import gc @@ -42,7 +42,7 @@ def test_kmeans_two_clusters(): for typecode in ['bytearray', 'B']: dataset, centroids = make_two_cluster_data(typecode) - assignments = emlkmeans.cluster(dataset, centroids, features=n_features) + assignments = emlearn_kmeans.cluster(dataset, centroids, features=n_features) assert len(assignments) == len(dataset)/n_features assert list(assignments) == [0, 0, 1, 1], assignments @@ -57,7 +57,7 @@ def test_kmeans_many_features(): dataset = array.array(typecode, (0 for _ in range(n_features*n_samples)) ) centroids = array.array(typecode, (0 for _ in range(n_features*n_clusters)) ) - assignments = emlkmeans.cluster(dataset, centroids, features=n_features, max_iter=2) + assignments = emlearn_kmeans.cluster(dataset, centroids, features=n_features, max_iter=2) assert len(assignments) == len(dataset)/n_features assert min(assignments) >= 0 assert max(assignments) < n_clusters diff --git a/tests/test_neighbors.py b/tests/test_neighbors.py index b7802da..f89ef2a 100644 --- a/tests/test_neighbors.py +++ b/tests/test_neighbors.py @@ -1,5 +1,5 @@ -import emlneighbors +import emlearn_neighbors import array import gc @@ -12,7 +12,7 @@ def test_neighbors_del(): gc.enable() gc.collect() before_new = gc.mem_alloc() - model = emlneighbors.new(30, 5, 1) + model = emlearn_neighbors.new(30, 5, 1) after_new = gc.mem_alloc() added = after_new - before_new gc.collect() @@ -29,7 +29,7 @@ def test_neighbors_trivial(): n_features = 3 n_items = 100 k_neighbors = 2 - model = emlneighbors.new(n_items, n_features, k_neighbors) + model = emlearn_neighbors.new(n_items, n_features, k_neighbors) item = array.array('h', [0, 0, 0]) @@ -54,7 +54,7 @@ def test_neighbors_trivial(): def test_neighbors_get_results(): - model = emlneighbors.new(100, 3, 1) + model = emlearn_neighbors.new(100, 3, 1) data = [ (array.array('h', [-100, -100, -2]), 0), diff --git a/tests/test_trees.py b/tests/test_trees.py index d62eabe..cca418a 100644 --- a/tests/test_trees.py +++ b/tests/test_trees.py @@ -1,5 +1,5 @@ -import emltrees +import emlearn_trees import array import gc @@ -12,7 +12,7 @@ def test_trees_del(): gc.collect() before_new = gc.mem_alloc() - model = emltrees.new(5, 30, 4) + model = emlearn_trees.new(5, 30, 4) after_new = gc.mem_alloc() added = after_new - before_new @@ -29,11 +29,11 @@ def test_trees_xor(): Loading simple XOR model and predictions should give correct results """ - model = emltrees.new(5, 30, 4) + model = emlearn_trees.new(5, 30, 4) # Load a CSV file with the model with open('examples/xor_trees/xor_model.csv', 'r') as f: - emltrees.load_model(model, f) + emlearn_trees.load_model(model, f) # run it s = 32767 # max int16