From 3e81cf9d8918b0b3e4978894b8e7f3a2764bf9bc Mon Sep 17 00:00:00 2001 From: Boris Barbour Date: Mon, 15 May 2017 00:03:33 +0200 Subject: [PATCH 1/8] Optimised USB receive code - interrupts, no locking, block transfers, DMA access. --- cores/arduino/USB/CDC.cpp | 117 ++++++++++++++++++++-------------- cores/arduino/USB/USBAPI.h | 48 ++++++++++++-- cores/arduino/USB/USBCore.cpp | 30 +++++---- 3 files changed, 130 insertions(+), 65 deletions(-) diff --git a/cores/arduino/USB/CDC.cpp b/cores/arduino/USB/CDC.cpp index dea6bd2b..68501f8c 100644 --- a/cores/arduino/USB/CDC.cpp +++ b/cores/arduino/USB/CDC.cpp @@ -1,4 +1,5 @@ /* Copyright (c) 2011, Peter Barrett +** Copyright (c) 2017, Boris Barbour ** ** Permission to use, copy, modify, and/or distribute this software for ** any purpose with or without fee is hereby granted, provided that the @@ -21,23 +22,12 @@ #ifdef CDC_ENABLED -#define CDC_SERIAL_BUFFER_SIZE 512 - /* For information purpose only since RTS is not always handled by the terminal application */ #define CDC_LINESTATE_DTR 0x01 // Data Terminal Ready #define CDC_LINESTATE_RTS 0x02 // Ready to Send #define CDC_LINESTATE_READY (CDC_LINESTATE_RTS | CDC_LINESTATE_DTR) -struct ring_buffer -{ - uint8_t buffer[CDC_SERIAL_BUFFER_SIZE]; - volatile uint32_t head; - volatile uint32_t tail; -}; - -ring_buffer cdc_rx_buffer = { { 0 }, 0, 0}; - typedef struct { uint32_t dwDTERate; @@ -173,45 +163,45 @@ void Serial_::end(void) void Serial_::accept(void) { - static uint32_t guard = 0; - - // synchronized access to guard - do { - if (__LDREXW(&guard) != 0) { - __CLREX(); - return; // busy - } - } while (__STREXW(1, &guard) != 0); // retry until write succeed - + // Use fifocon to synchronise. Leave if there is no data. + if (!Is_udd_fifocon(CDC_RX)) return; + // This rearms interrupt, but FIFO must be released before it + // can retrigger. Moved here from the interrupt service + // routine because we may come to this function directly. + if (Is_udd_out_received(CDC_RX)) udd_ack_out_received(CDC_RX); ring_buffer *buffer = &cdc_rx_buffer; - uint32_t i = (uint32_t)(buffer->head+1) % CDC_SERIAL_BUFFER_SIZE; - - // if we should be storing the received character into the location - // just before the tail (meaning that the head would advance to the - // current location of the tail), we're about to overflow the buffer - // and so we don't write the character or advance the head. - while (i != buffer->tail) { - uint32_t c; - if (!USBD_Available(CDC_RX)) { - udd_ack_fifocon(CDC_RX); - break; - } - c = USBD_Recv(CDC_RX); - // c = UDD_Recv8(CDC_RX & 0xF); - buffer->buffer[buffer->head] = c; - buffer->head = i; - - i = (i + 1) % CDC_SERIAL_BUFFER_SIZE; + uint32_t b = CDC_SERIAL_BUFFER_SIZE; + uint32_t u = UDD_FifoByteCount(CDC_RX); + uint32_t s = b - (uint32_t)(buffer->head - buffer->tail); + uint32_t r = min(s, u); + while(r) { + // May only be able to fill to the end of the buffer in first call. + uint32_t h = (buffer->head)%b; + uint32_t g = min(r, b-h); + UDD_Recv(CDC_RX, &(buffer->buffer[h]), g); + r -= g; + buffer->head += g; } + // Don't release FIFO if not all data was transferred. + if (!UDD_FifoByteCount(CDC_RX)) UDD_ReleaseRX(CDC_RX); +} - // release the guard - guard = 0; +void Serial_::enableInterrupts() +{ + udd_enable_out_received_interrupt(CDC_RX); + udd_enable_endpoint_interrupt(CDC_RX); +} + +void Serial_::disableInterrupts() +{ + udd_disable_out_received_interrupt(CDC_RX); + udd_disable_endpoint_interrupt(CDC_RX); } int Serial_::available(void) { ring_buffer *buffer = &cdc_rx_buffer; - return (unsigned int)(CDC_SERIAL_BUFFER_SIZE + buffer->head - buffer->tail) % CDC_SERIAL_BUFFER_SIZE; + return (unsigned int)(buffer->head - buffer->tail); } int Serial_::availableForWrite(void) @@ -231,7 +221,8 @@ int Serial_::peek(void) } else { - return buffer->buffer[buffer->tail]; + uint32_t b = CDC_SERIAL_BUFFER_SIZE; + return buffer->buffer[(buffer->tail)%b]; } } @@ -239,6 +230,11 @@ int Serial_::read(void) { ring_buffer *buffer = &cdc_rx_buffer; + // Give "accept" a chance to catch up if data is ready. + // Interrupt shouldn't be able to fire in this condition. + //if (Is_udd_fifocon(CDC_RX)) + accept(); + // if the head isn't ahead of the tail, we don't have any characters if (buffer->head == buffer->tail) { @@ -246,14 +242,41 @@ int Serial_::read(void) } else { - unsigned char c = buffer->buffer[buffer->tail]; - buffer->tail = (unsigned int)(buffer->tail + 1) % CDC_SERIAL_BUFFER_SIZE; - if (USBD_Available(CDC_RX)) - accept(); + uint32_t b = CDC_SERIAL_BUFFER_SIZE; + unsigned char c = buffer->buffer[(buffer->tail)%b]; + buffer->tail++; return c; } } +int Serial_::read(uint8_t *d, size_t s) +{ + ring_buffer *buffer = &cdc_rx_buffer; + uint32_t b = CDC_SERIAL_BUFFER_SIZE; + uint32_t a = (uint32_t) (buffer->head - buffer->tail); + // Number of bytes to read is the smaller of those available and those requested. + uint32_t r = min(a, s); + uint32_t k = r; + // May reach end of buffer before completing transfer. + while(r) { + uint32_t tm = (buffer->tail)%b; + uint32_t g = min(r, b-tm); + for (int i = 0 ; i < g; i++) { + d[i] = buffer->buffer[tm + i]; + } + d += g; + r -= g; + buffer->tail += g; + } + // Give "accept" a chance to catch up if data is ready. + // Interrupt shouldn't be able to fire in this condition. + // if (Is_udd_fifocon(CDC_RX)) { + if ((a-k) < b) accept(); + //} + return k; +} + + void Serial_::flush(void) { USBD_Flush(CDC_TX); diff --git a/cores/arduino/USB/USBAPI.h b/cores/arduino/USB/USBAPI.h index 0a706342..07423c36 100644 --- a/cores/arduino/USB/USBAPI.h +++ b/cores/arduino/USB/USBAPI.h @@ -1,5 +1,7 @@ /* - Copyright (c) 2012 Arduino. All right reserved. + Copyright (c) 2012 Arduino. + Copyright (c) 2017 Boris Barbour + All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -21,7 +23,6 @@ #if defined __cplusplus -#include "RingBuffer.h" #include "Stream.h" //================================================================================ @@ -40,15 +41,53 @@ class USBDevice_ }; extern USBDevice_ USBDevice; +// Best to use a power of 2 to enable simplified modulo calculations +// (the compiler will automatically use &). USB2 packets can contain +// up to 1kb (isochronous mode), but usually less. The FIFOs seem to +// be 512 bytes on the Due/SAM. +#define CDC_SERIAL_BUFFER_SIZE 512 + +// This could go into a separate header and file, but it's small and +// users may need access to the size definition. Note that this is +// (confusingly) distinct from the general RingBuffer declared in +// RingBuffer.h. The implementation has been changed. Instead of +// continuously taking the modulus of head and tail, we now have +// ever-increasing longs, whose modulus is taken only to address the +// buffer. This may add a small overhead, but ensures that the tail +// overrunning the head can be detected even with interrupt and DMA +// applications. As a minor side-effect, the buffer can hold one more +// byte, since head==tail (empty) can now be distinguished from +// head==tail+size (full). The use of 64-bit uints ensures that they +// never overflow in the lifetime of the universe; 32-bit uints might +// do so in a matter of hours or days at top rates. +struct ring_buffer +{ + uint8_t buffer[CDC_SERIAL_BUFFER_SIZE]; + volatile uint64_t head; + volatile uint64_t tail; +}; + //================================================================================ //================================================================================ // Serial over CDC (Serial1 is the physical port) class Serial_ : public Stream { -private: - RingBuffer *_cdc_rx_buffer; public: + // The ring buffer implementation is public to allow user DMA access. + ring_buffer cdc_rx_buffer = { { 0 }, 0, 0}; + // Standard arduino only schedules interrupts between "loop" + // iterations, so this is the default and the user will be + // responsible for scheduling reception of data via "accept" + // directly or indirectly by calling one of the "read" + // functions, which call "accept" (the read functions are not + // used during DMA applications). Even when interrupts are + // enabled, if the receive buffer doesn't have space when the + // interrupt service routine is called, it can be necessary to + // call "accept" manually to complete the transfer and prevent + // blocking. + void enableInterrupts(); + void disableInterrupts(); void begin(uint32_t baud_count); void begin(uint32_t baud_count, uint8_t config); void end(void); @@ -58,6 +97,7 @@ class Serial_ : public Stream virtual void accept(void); virtual int peek(void); virtual int read(void); + virtual int read(uint8_t *d, size_t t); virtual void flush(void); virtual size_t write(uint8_t); virtual size_t write(const uint8_t *buffer, size_t size); diff --git a/cores/arduino/USB/USBCore.cpp b/cores/arduino/USB/USBCore.cpp index e7652016..77b2c372 100644 --- a/cores/arduino/USB/USBCore.cpp +++ b/cores/arduino/USB/USBCore.cpp @@ -1,4 +1,5 @@ // Copyright (c) 2010, Peter Barrett +// Copyright (c) 2017, Boris Barbour /* ** Permission to use, copy, modify, and/or distribute this software for ** any purpose with or without fee is hereby granted, provided that the @@ -137,21 +138,22 @@ uint32_t USBD_Available(uint32_t ep) // Non Blocking receive // Return number of bytes read + +// Caller must release empty FIFO; release was moved out to +// prevent a race condition when interrupts are enabled. The +// "accept" function will release as necessary if used +// (recommended). uint32_t USBD_Recv(uint32_t ep, void* d, uint32_t len) { if (!_usbConfiguration) return -1; - LockEP lock(ep); uint32_t n = UDD_FifoByteCount(ep & 0xF); len = min(n,len); n = len; uint8_t* dst = (uint8_t*)d; while (n--) *dst++ = UDD_Recv8(ep & 0xF); - if (len && !UDD_FifoByteCount(ep & 0xF)) // release empty buffer - UDD_ReleaseRX(ep & 0xF); - return len; } @@ -159,10 +161,12 @@ uint32_t USBD_Recv(uint32_t ep, void* d, uint32_t len) uint32_t USBD_Recv(uint32_t ep) { uint8_t c; - if (USBD_Recv(ep & 0xF, &c, 1) != 1) - return -1; - else - return c; + if (USBD_Recv(ep & 0xF, &c, 1) == 0) + c = -1; + if (UDD_FifoByteCount(ep & 0xF) == 0) { + UDD_ReleaseRX(ep & 0xF); + } + return c; } // Space in send EP @@ -620,13 +624,11 @@ static void USB_ISR(void) } #ifdef CDC_ENABLED - if (Is_udd_endpoint_interrupt(CDC_RX)) + // Seems safer than previous and less specific Is_udd_endpoint_interrupt. + if (Is_udd_out_received(CDC_RX)) { - udd_ack_out_received(CDC_RX); - - // Handle received bytes - if (USBD_Available(CDC_RX)) - SerialUSB.accept(); + //Handle received bytes + SerialUSB.accept(); } if (Is_udd_sof()) From e8545af06492113a66f19965c67bcf11bd65e3f9 Mon Sep 17 00:00:00 2001 From: Boris Barbour Date: Mon, 15 May 2017 00:21:32 +0200 Subject: [PATCH 2/8] Example of high-speed bidirectional streaming with DAC, ADC DMA. Added README. --- README.md | 27 ++ .../bidirectional_streaming.ino | 257 ++++++++++++++++++ .../bidirectional_streaming.py | 99 +++++++ 3 files changed, 383 insertions(+) create mode 100644 README.md create mode 100644 examples/bidirectional_streaming/bidirectional_streaming.ino create mode 100644 examples/bidirectional_streaming_py/bidirectional_streaming.py diff --git a/README.md b/README.md new file mode 100644 index 00000000..7ff52d58 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +Streaming data over the native USB to the Arduino Due was disappointingly slow, about 60kb/s, yet transmission from Arduino to host computer could run at Mb/s, suggesting that the USB connection was not limiting. Inspection of the code showed that data was received byte by byte through a chain of functions with massively redundant checks. + +This repository is a fork of the official code and contains optimisations of the USB code. With them, it is possible to perform round-trip streaming of data to and from the Arduino Due at 1.8Mb/s at least and probably much faster (the Arduino code was not limiting in these tests driven by a simple python script on the host computer). + +A simple example application making use of this code is included in the "examples" directory. A simple (linux) python script streams arbitrary blocks of data from the computer to the Arduino DAC (using DMA) and simultaneously receives two channels of ADC data (again using DMA). The parameters in the scripts set a 500kHz clock for both DAC and ADC (each channel), but the code could stream at twice the rate without difficulty. + +Changes to the code +=================== + +A (non-blocking) overloaded read function that accepts as parameters a buffer and size is now provided as a member of the SerialUSB class. If neither of the read functions is used, for instance during a DMA application, the user may need to call "SerialUSB.accept()" periodically if there is a danger that the buffer will sometimes be too full to accept a full FIFO (512 bytes) of data upon interrupt, as this will cause reception to block. The CDC_SERIAL_BUFFER_SIZE can be increased from the original 512 bytes to reduce this risk. + +The SerialUSB (Serial_) class has been modified to remove all mention of the RingBuffer used elsewhere in the Arduino code but NOT here. This was confusing at best. + +The ring_buffer that IS used in SerialUSB has been made a member of the class. This enables access to it during DMA applications (e.g. streaming to DAC), eliminating needless copy operations. + +The implementation of the ring_buffer has been altered slightly for easy compatibility with DMA applications; head and tail are now ever-increasing 64-bit integers. + +The Arduino Due code uses a poor-man's interrupts, scheduling them between loop iterations. Proper USB interrupts can now be enabled via new member functions and interrupt-driven code can handle most, and in some scenarios all, data reception. + +The code has been reworked to remove the need for locking even with interrupts enabled. This is achieved by using the FIFO signals for synchronisation. + +Block transfers are now used throughout the reception chain and their overhead has been minimised. The accept function has been rewritten. + +The changes to the code are under the same licences as the original files. + + + diff --git a/examples/bidirectional_streaming/bidirectional_streaming.ino b/examples/bidirectional_streaming/bidirectional_streaming.ino new file mode 100644 index 00000000..5ef25768 --- /dev/null +++ b/examples/bidirectional_streaming/bidirectional_streaming.ino @@ -0,0 +1,257 @@ +#include + +// Requires modified USB files: CDC.cpp, USBCore.cpp, USBAPI.h. + +// Thanks to the following. +// https://github.com/ivanseidel/DueTimer/issues/11 +// https://github.com/OliviliK/DueTC +// Note that in the following I've corrected an error in updating buffer pointers. +// https://gist.github.com/pklaus/5921022 +// https://forum.arduino.cc/index.php?topic=224672.0 + +// Header information: +// nS: Number of DAC samples +// adcF: ADC clock in Hz +// clkDiv: Divider for conversion timer (base clock is at 42 MHz) +// It seems that ADC clock frequency must be at least 42x the +// conversion clock frequency. +uint32_t header [3]; + +// Quarter buffer size is in bytes. +const int qsize = CDC_SERIAL_BUFFER_SIZE/4; +// Pointer to the SerialUSB ring_buffer. +ring_buffer * rxb = &(SerialUSB.cdc_rx_buffer); +// Number of 2-byte samples remaining to queue. +uint32_t dacrem; +// Keep track of DMA buffer sizes **in samples**. +uint32_t dacbs1, dacbs2; +// Receive count in samples. +uint32_t rxc = 0; + +bool live = false; +const uint16_t hold = 2047; +#define HOLDBUF_SIZE 256 +uint16_t holdbuf[HOLDBUF_SIZE]; + +// Size of each buffer in samples. +const int bufsize = 1024; +// Number of buffers. Use a power of two to enable "and" which is quicker than generic "mod". +const int nbuf = 4; +volatile int adcbufn, adcobufn; +// Buffers of bufsize readings +uint16_t adcbuf[nbuf][bufsize]; +// Number of ADC channels. +uint32_t nadcChan = 2; + +// ADC samples remaining to be scheduled. +uint32_t adcrem; +// ADC samples transmitted. +uint32_t txc; + +bool errorFlag = 0; + +void blockingRead(uint8_t * b, uint32_t n) { + while (n) { + n -= SerialUSB.read(b, n); + } +} + +void blockingWrite(uint8_t * b, uint32_t n) { + while (n) { + n -= SerialUSB.write(b, n); + } +} + +void signalLED(int o = 1) { + digitalWrite(13, o); +} + +void ADC_Handler(){ // move DMA pointers to next buffer + int f=ADC->ADC_ISR; + // 1<<27 is ENDRX buffer. + if (f&(1<<27)){ + adcbufn++; + ADC->ADC_RNPR=(uint32_t)adcbuf[(adcbufn+1)%nbuf]; + ADC->ADC_RNCR=bufsize; + if (adcrem > bufsize) { + adcrem -= bufsize; + } else { + adcrem = 0; + } + if (adcbufn > adcobufn + nbuf) errorFlag = true; + } +} + +void setupADC(uint32_t adcF=42000000) { + // Prepare analog input. + analogReadResolution(12); + pmc_enable_periph_clk(ID_ADC); + // Max 42000000 for ADC frequency. Seems to need 42 cycles per conversion. + adc_init(ADC, SystemCoreClock, adcF, ADC_STARTUP_FAST); + // Disable hardware triggers. + ADC->ADC_MR &= ~1; + // 0x80000000 = 1<<31 sets USEQ; User Sequence? + ADC->ADC_MR |= ADC_TRIG_TIO_CH_0 | 1<<31; + // Alternate A0 and A1. Twisted SAM -> DUE channel mapping. + ADC->ADC_CHER=0xC0; + ADC->ADC_SEQR1 = 0x67000000; + + adcbufn = 0; + adcobufn = 0; + txc = 0; + ADC->ADC_RPR = (uint32_t)adcbuf[0]; // DMA buffer + ADC->ADC_RCR = bufsize; + adcrem -= bufsize; + ADC->ADC_RNPR = (uint32_t)adcbuf[1]; // next DMA buffer + ADC->ADC_RNCR = bufsize; + adcrem -= bufsize; + // I think this is PDC receiver transfer enable RXTEN. + ADC->ADC_PTCR=1; + // Enable hardware trigger. + ADC->ADC_MR |= 1; + // Enable trigger after writing to RCR or RNCR, otherwise spurious ENDRX interrupt + // is raised. + ADC->ADC_IER=1<<27; + ADC->ADC_IDR=~(1<<27); + NVIC_EnableIRQ(ADC_IRQn); +} + +// A non-zero value must be written to either TCR or TNCR to clear ENDTX interrupt +// and prevent immediate refiring: +// http://community.atmel.com/forum/how-can-you-do-peripheral-dma-fixed-number-blocks +void DACC_Handler() { + if (dacc_get_interrupt_status(DACC) & DACC_ISR_ENDTX) { + if (live) { + // We just got through a buffer. + rxb->tail += dacbs1*2; + rxc += dacbs1; + if (rxb->tail > rxb->head) errorFlag = true; + dacbs1 = dacbs2; + if (dacbs1 == 0) live = false; + if (dacrem) { + DACC->DACC_TNPR = (uint32_t) &(rxb->buffer[(rxb->tail + dacbs2*2)%CDC_SERIAL_BUFFER_SIZE]); + dacbs2 = min(dacrem, qsize/2); + DACC->DACC_TNCR = dacbs2; + dacrem -= dacbs2; + } else { + DACC->DACC_TNPR = (uint32_t) &holdbuf; + DACC->DACC_TNCR = HOLDBUF_SIZE; + dacbs2 = 0; + } + } else { + DACC->DACC_TNPR = (uint32_t) &holdbuf; + DACC->DACC_TNCR = HOLDBUF_SIZE; + } + } +} + +void setupDAC(uint32_t clkDiv=84) { + // Prepare analog input on DAC0. + pinMode(DAC0, OUTPUT); + analogWriteResolution(12); + pmc_enable_periph_clk (DACC_INTERFACE_ID) ; // start clocking DAC + dacc_reset(DACC); + // Controls whether "word" mode (32-bit or 16-bit transfers?). 0 -> 16 bits. + dacc_set_transfer_mode(DACC, 0); + dacc_set_power_save(DACC, 0, 1); // sleep = 0, fastwkup = 1 + dacc_set_analog_control(DACC, DACC_ACR_IBCTLCH0(0x02) | DACC_ACR_IBCTLCH1(0x02) | DACC_ACR_IBCTLDACCORE(0x01)); + dacc_set_trigger(DACC, 1); // 1 means TC0. + dacc_set_channel_selection(DACC, 0); + dacc_enable_channel(DACC, 0); + + DACC->DACC_TPR = (uint32_t) &holdbuf; // DMA buffer + DACC->DACC_TCR = HOLDBUF_SIZE; + DACC->DACC_TNPR = (uint32_t) &holdbuf; + DACC->DACC_TNCR = HOLDBUF_SIZE; + DACC->DACC_PTCR = 0x00000100; + dacc_enable_interrupt(DACC, DACC_IER_ENDTX); + NVIC_EnableIRQ(DACC_IRQn); +} + +void prepareOutput() { + rxc = 0; + DACC->DACC_TPR = (uint32_t) &(rxb->buffer[rxb->tail%CDC_SERIAL_BUFFER_SIZE]); // DMA buffer + uint32_t fillq = (qsize - rxb->tail%qsize)/2; + dacbs1 = min(dacrem, fillq); + DACC->DACC_TCR = dacbs1; + dacrem -= dacbs1; + if (dacrem) { + DACC->DACC_TNPR = (uint32_t) &(rxb->buffer[(rxb->tail+(dacbs1*2))%CDC_SERIAL_BUFFER_SIZE]); + dacbs2 = min(dacrem, qsize/2); + DACC->DACC_TNCR = dacbs2; + dacrem -= dacbs2; + } else { + dacbs2 = 0; + } + live = true; +} + +void stopTimer() { + // Stop timer. + REG_TC0_CCR0=2; +} + +void startTimer(uint32_t clkDiv=42) { + setupTC_Pin2_Timing(clkDiv,0); +} + +void setup() { + // Error signal. + pinMode(13, OUTPUT); + // For checking TC0. + pinMode(2, OUTPUT); + // Output trigger if required. + pinMode(52, OUTPUT); + digitalWrite(52, LOW); + // Input trigger if required. + pinMode(22, INPUT); + digitalWrite(13, LOW); + for (int i = 0; i < 256; i++) { + holdbuf[i] = hold; + } + setupDAC(); + startTimer(); + SerialUSB.begin(0); + while(!SerialUSB); + SerialUSB.enableInterrupts(); +} + +void loop() { + // Get header + blockingRead((uint8_t*) header, 12); + // Number of samples. + uint32_t nS = header[0]; + // ADC clock frequency. + uint32_t adcF = header[1]; + // Conversion clock frequency divider. + uint32_t clkDiv = header[2]; + blockingWrite((uint8_t*) &nadcChan, 4); + dacrem = nS; + adcrem = nadcChan*nS; + // Make sure that DAC data is primed. + uint32_t threshold = min(nS*2, 3*CDC_SERIAL_BUFFER_SIZE/4); + while(SerialUSB.available() < threshold) SerialUSB.accept(); + errorFlag = false; + digitalWrite(13, LOW); + // Wait for trigger on pin 22. The following code will delay start somewhat, + // but the DAC output would drift if not refreshed. + // while (digitalRead(22)); // Or !digitalRead(22) + stopTimer(); + prepareOutput(); + setupADC(adcF); + digitalWrite(52, HIGH); + startTimer(clkDiv); + // Transfer until all input and output is accounted for. + while (txc < nadcChan*nS | rxc < nS) { + SerialUSB.accept(); + if (adcbufn > adcobufn) { + uint32_t n = min((nadcChan*nS-txc), bufsize); + blockingWrite((uint8_t*) adcbuf[adcobufn%nbuf], n*2);// + txc += n; + adcobufn += 1; + } + if (errorFlag) signalLED(); + } + digitalWrite(52, LOW); + SerialUSB.write((uint8_t*) &errorFlag, 1);// +} diff --git a/examples/bidirectional_streaming_py/bidirectional_streaming.py b/examples/bidirectional_streaming_py/bidirectional_streaming.py new file mode 100644 index 00000000..38ce9017 --- /dev/null +++ b/examples/bidirectional_streaming_py/bidirectional_streaming.py @@ -0,0 +1,99 @@ + +import numpy as np +import serial +import struct +import time +import glob +import hashlib + +currentport = glob.glob('/dev/ttyACM*')[0] +print "Port: ", currentport + +port = serial.Serial(currentport, baudrate=0, parity = serial.PARITY_NONE, stopbits = serial.STOPBITS_ONE, bytesize = serial.EIGHTBITS, timeout=0) + +savepath = '/users/nsr/barbour/junk/' + +# Just for testing purposes. +testdata = np.ones(1000000) +for i in range(len(testdata)): + testdata[i] = i%4096 + +def blockingWrite(data): + n = len(data) + while n: + n -= port.write(data) + +def blockingRead(n): + r = '' + while n: + s = port.read(n) + r += s + n -= len(s) + return r + +class sound: + def __init__(self, data=testdata, clkDiv=84, adcF=21000000): + self.nS = len(data) + self.clkDiv = clkDiv + self.adcF = adcF + self.txd = struct.pack("<%sH"%(len(data)), *data) + # These are to regulate flow through the port. It seems it can + # block if too many data are written while reading. + self.maxOut = 65536 + self.maxIn = self.maxOut + self.maxAdvance = 65536 + self.hash = hashlib.md5(self.txd).hexdigest() + print "Sound has hash: ", self.hash + def episode(self): + # Number of bytes to transmit. + nb = len(self.txd) + tstamp = time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()) + print tstamp + print self.hash + h = struct.pack("<3I", self.nS, self.adcF, self.clkDiv) + blockingWrite(h) + # DAC and ADC... + fname = "DnA_"+tstamp+".dat" + # Leave as string for immediate writing. + nchan = blockingRead(4) + preamble = fname + h + nchan + # Later we need the number. + nchan = struct.unpack(" Date: Mon, 15 May 2017 23:46:59 +0200 Subject: [PATCH 3/8] Add comments about file locations to example. --- .../bidirectional_streaming/bidirectional_streaming.ino | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/bidirectional_streaming/bidirectional_streaming.ino b/examples/bidirectional_streaming/bidirectional_streaming.ino index 5ef25768..6fdf3412 100644 --- a/examples/bidirectional_streaming/bidirectional_streaming.ino +++ b/examples/bidirectional_streaming/bidirectional_streaming.ino @@ -1,6 +1,11 @@ #include -// Requires modified USB files: CDC.cpp, USBCore.cpp, USBAPI.h. +// Requires modified USB files: CDC.cpp, USBCore.cpp, USBAPI.h. +// These are found in the "board support package". On linux they usually +// install in a hidden file in the user's home directory, somewhere under: +// ~/.arduino15/ +// and the above files are found in: +// .arduino15/packages/arduino/hardware/sam/1.6.11/cores/arduino/USB/ // Thanks to the following. // https://github.com/ivanseidel/DueTimer/issues/11 From 57e68d239793963b19bf70231ffb6885b0989a17 Mon Sep 17 00:00:00 2001 From: Boris BARBOUR Date: Mon, 4 Mar 2019 23:51:27 +0100 Subject: [PATCH 4/8] Fixed bug in blocking r/w functions in .ino; better examples. --- README.md | 61 ++- examples/bidirectional_streaming/bidi.cpp | 137 ++++++ .../bidirectional_streaming/DueTC.cpp | 393 ++++++++++++++++++ .../bidirectional_streaming/DueTC.h | 70 ++++ .../bidirectional_streaming.ino | 12 +- examples/bidirectional_streaming/display.py | 25 ++ examples/bidirectional_streaming/genfile.py | 15 + .../bidirectional_streaming.py | 99 ----- 8 files changed, 703 insertions(+), 109 deletions(-) create mode 100644 examples/bidirectional_streaming/bidi.cpp create mode 100644 examples/bidirectional_streaming/bidirectional_streaming/DueTC.cpp create mode 100644 examples/bidirectional_streaming/bidirectional_streaming/DueTC.h rename examples/bidirectional_streaming/{ => bidirectional_streaming}/bidirectional_streaming.ino (97%) create mode 100644 examples/bidirectional_streaming/display.py create mode 100644 examples/bidirectional_streaming/genfile.py delete mode 100644 examples/bidirectional_streaming_py/bidirectional_streaming.py diff --git a/README.md b/README.md index 7ff52d58..2384b3e3 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,14 @@ -Streaming data over the native USB to the Arduino Due was disappointingly slow, about 60kb/s, yet transmission from Arduino to host computer could run at Mb/s, suggesting that the USB connection was not limiting. Inspection of the code showed that data was received byte by byte through a chain of functions with massively redundant checks. +Streaming data over the native USB to the Arduino Due was disappointingly slow, about 60kb/s, yet transmission from Arduino to host computer could run at Mb/s, suggesting that the USB connection was not limiting. Inspection of the code showed that data was received byte by byte through a chain of functions with an overhead of massively redundant checks. This repository is a fork of the official code and contains optimisations of the USB code. With them, it is possible to perform round-trip streaming of data to and from the Arduino Due at 1.8Mb/s at least and probably much faster (the Arduino code was not limiting in these tests driven by a simple python script on the host computer). -A simple example application making use of this code is included in the "examples" directory. A simple (linux) python script streams arbitrary blocks of data from the computer to the Arduino DAC (using DMA) and simultaneously receives two channels of ADC data (again using DMA). The parameters in the scripts set a 500kHz clock for both DAC and ADC (each channel), but the code could stream at twice the rate without difficulty. +Only 3 files have been changed. If you don't wish to deal with the whole git repository, the files CDC.cpp, USBAPI.h and USBCore.cpp in the ArduinoCore-sam/cores/arduino/USB/ directory can be transferred into your local board package, which under my linux is found here: +~/.arduino15/packages/arduino/hardware/sam//cores/arduino/USB/ -Changes to the code -=================== +A pull request has been submitted but not acted upon. + +Changes to the arduino libraries +================================ A (non-blocking) overloaded read function that accepts as parameters a buffer and size is now provided as a member of the SerialUSB class. If neither of the read functions is used, for instance during a DMA application, the user may need to call "SerialUSB.accept()" periodically if there is a danger that the buffer will sometimes be too full to accept a full FIFO (512 bytes) of data upon interrupt, as this will cause reception to block. The CDC_SERIAL_BUFFER_SIZE can be increased from the original 512 bytes to reduce this risk. @@ -13,7 +16,7 @@ The SerialUSB (Serial_) class has been modified to remove all mention of the Rin The ring_buffer that IS used in SerialUSB has been made a member of the class. This enables access to it during DMA applications (e.g. streaming to DAC), eliminating needless copy operations. -The implementation of the ring_buffer has been altered slightly for easy compatibility with DMA applications; head and tail are now ever-increasing 64-bit integers. +The implementation of the ring_buffer has been altered slightly to facilitate DMA applications; head and tail are now ever-increasing 64-bit integers. The Arduino Due code uses a poor-man's interrupts, scheduling them between loop iterations. Proper USB interrupts can now be enabled via new member functions and interrupt-driven code can handle most, and in some scenarios all, data reception. @@ -23,5 +26,53 @@ Block transfers are now used throughout the reception chain and their overhead h The changes to the code are under the same licences as the original files. +Examples +======== + +Speed test +---------- + +This a simple speed test. The Arduino sketch just reads available data on the native USB serial port using the new block read member and sends it back, in a loop. On the host computer, a large array is written to and read back from the serial tty by a short C++ program making use of the "select" call for efficient sequencing of the i/o operations. The port is specified as a parameter ("0" in the example). + +$ g++ -O3 -o speed_test speed_test.cpp +$ time ./speed_test 0 +Test round-trip streaming with 100000000 bytes. +/dev/ttyACM0 +Arrays equal! + +real 0m37.852s +user 0m0.288s +sys 0m1.576s + +100 Mb in ~40s is 2.5 Mb/s. + +Bidirectional streaming +----------------------- + +This example is much more involved, but reflects the motivation for this project. An array is streamed from the host computer to the arduino, where it is transferred to the DAC by DMA. At the same time, two ADC channels are acquired at the same total frequency and streamed back to the host. Here the speed is limited by the maximum ADC rate of 1 MHz, corresponding to an arduino -> host data rate of 2 Mb/s (with 1 Mb/s flowing in the opposite direction). A timer library is included from https://github.com/OliviliK/DueTC +A file is generated by the python script that contains a few control parameters in the header and data for the DAC, as well as space for the ADC data to be acquired, an error flag and a timestamp. The file is memory-mapped to enable simultaneous i/o. The path to the data file and the tty port are given as parameters. + +Connect DAC0 to A0 and GND to A1 (for instance). + +$ python genfile.py +$ time ./bidi test.dat /dev/ttyACM0 +test.dat +10000000 42000000 42 2 +2019-03-04_01:05:30 + +real 0m20.111s +user 0m0.020s +sys 0m1.088s +$python display.py +header (10000000, 42000000, 42, 2) +error 0 +timestamp 2019-03-04_01:05:30 + + +If you find the streaming to be unreliable (an error is raised), there is probably a bottleneck somewhere. Things to try: +- use an SSD instead of a normal disk +- modify the host code to work only in memory +- increase the sizes of the buffers in the arduino library code or for the DAC, ADC buffers. +The examples are licensed in the public domain. diff --git a/examples/bidirectional_streaming/bidi.cpp b/examples/bidirectional_streaming/bidi.cpp new file mode 100644 index 00000000..f0a4c1d0 --- /dev/null +++ b/examples/bidirectional_streaming/bidi.cpp @@ -0,0 +1,137 @@ +// g++ -O3 -o bidi bidi.cpp +// Useful resource! https://en.wikibooks.org/wiki/Serial_Programming/termios +// Also used example code here: https://www.tldp.org/HOWTO/text/Serial-Programming-HOWTO +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* +File format +uint32_t nS number of DAC samples (parameter) +uint32_t adcF ADC base clock frequency (max 42000000?) (parameter) +uint32_t clkDiv divisor of base clock for sample frequency (min 42?) (parameter) +uint32_t nchan (supplied from arduino) +array of nS shorts DAC data +array of nchan*nS shorts ADC data +bool error +string end-timestamp (e.g. 2019-03-02_13:58:11) + +Parameters +string path to file in correct format; nchan, ADC data, error and end-timestamp to be filled in. + +Call directly or from python using os.execl(executable, datafile, port); python use is untested. + */ + +#define MAX_TRANSFER 4096 + +int blocking_write(int fd, char *b, int n) { + int w = 0; + while (w < n) { + ssize_t s = write(fd, &b[w], n-w); + if (s >= 0) w += s; + } +} + +int blocking_read(int fd, char *b, int n) { + int r = 0; + while (r < n) { + ssize_t s = read(fd, &b[r], n-r); + if (s >= 0) r += s; + } +} + + + +int main(int argc, char *argv[]) { + // Open file. + std::string data(argv[1]); + int f = open(data.c_str(), O_RDWR); + std::cout << data << std::endl; + if (f < 0) { + std::cerr << "Error opening file " << data << std::endl; + return(1); + } + + int h[4]; + blocking_read(f, (char*) &h, 16); + int nS = h[0]; + int adcF = h[1]; + int clkDiv = h[2]; + int nchan = h[3]; + std::cout << nS << " " << adcF << " " << clkDiv << " " << nchan << std::endl; + // Open port. + int p = open(argv[2], O_RDWR | O_NOCTTY | O_NDELAY); + if (p == -1) { + std::cerr << "Error opening port " << argv[2] << std::endl; + return(1); + } + // Flush any prior garbage. + tcflush(p, TCIOFLUSH); + // Transmit header information. + blocking_write(p, (char*)&h, 12); + // Read nchan. Already supplied... + blocking_read(p, (char*)&h[3], 4); + // Read data offset. + off_t rdo = 16 + nS * 2; + // Error offset. + off_t eo = rdo + nS*nchan*2; + + char * addr = NULL; + // Error and timestamp add 20 bytes. + addr = (char*)mmap(NULL, eo+20, PROT_READ | PROT_WRITE, MAP_SHARED, f, 0); + // Write nchan to file. + memcpy(addr+12, &h[3], 4); + + // Write and read counters. + int w = 0; + int r = 0; + fd_set readfds, writefds; + int maxfd = p + 1; + while((w < nS*2) || (r < nS*nchan*2)) { + if (w < nS*2) { + FD_SET(p, &writefds); + } else { + FD_ZERO(&writefds); + } + if (r < nS*nchan*2) { + FD_SET(p, &readfds); + } else { + FD_ZERO(&readfds); + } + select(maxfd, &readfds, &writefds, NULL, NULL); + if (w < nS*2) { + ssize_t n = write(p, addr+16+w, std::min(nS*2 - w, MAX_TRANSFER)); + if (n >= 0) w += n; + } + if (r < nS*nchan*2) { + ssize_t n = read (p, addr+rdo+r, std::min(nS*nchan*2 - r, MAX_TRANSFER)); + if (n >= 0) r += n; + } + } + // Write error flag. + bool e; + blocking_read(p, (char*)&e, 1); + if (e) std::cerr << "Arduino error" << std::endl; + std::memcpy(addr+eo, &e, 1); + // Write end timestamp. + time_t rawtime; + struct tm * timeinfo; + char tbuffer [20]; + time (&rawtime); + timeinfo = localtime (&rawtime); + strftime (tbuffer, 20, "%Y-%m-%d_%H:%M:%S", timeinfo); + puts(tbuffer); + std::memcpy(addr+eo+1, tbuffer, 19); + munmap(addr, eo+20); + close(f); + return(0); +} diff --git a/examples/bidirectional_streaming/bidirectional_streaming/DueTC.cpp b/examples/bidirectional_streaming/bidirectional_streaming/DueTC.cpp new file mode 100644 index 00000000..0b617bb6 --- /dev/null +++ b/examples/bidirectional_streaming/bidirectional_streaming/DueTC.cpp @@ -0,0 +1,393 @@ +/* + DueTC.cpp - Local code of Arduino DUE Timer Control defined on DueTC.h + +Created by Olavi Kamppari, October 2013. +Reference to Atmel documentation 11057B-ATARM-28-May-12 +for SAM3X and SAM3A Series + +Programming principles: +- avoid unecessary S/W layers that are just complicating things +- use only one bit manipulation routine (encodeTC_WaveformMode) to calculate a value for Channel Mode Register (TC_CMR) +- use naming conventions specified by Atmel, such as + TCx, where x=indivudal timer number (0-8) + REG_TCi_j, where + i = timer block number (0-2), + j = timer channel number in the block (0-2), + = register name (CCR, CMR, SMMR, CV, RA, RB, RC, SR, IER, IDR, IMR, BCR, BMR, + QIER, QIDR, QIMR. QISR, FMR, WPMR) +- define pin connected timers and interrupts that are not conflicting with system timers and eachothers +- define all available connected timers and interrupts +- verify the results with an oscilloscope + > observe the common false assumption that TIMER_CLOCK5 divider is 1024, when it actually is 3072 + +The main information that is not evident is in routine + + pmc_enable_periph_clk(TCx_IRQn); + +It connects the interrupt ID number (TCx_IRQn) to a predefined interrupt handler (TCx_Handler). The handler code is in this +module and it calls a user defined interrupt handler that is defined in the setup call (setupTC2_Interrupt). + +Released into the public domain. +*/ + +#include "DueTC.h" + +unsigned long encodeTC_WaveformMode( + byte BSWTRG, byte BEEVT, byte BCPC, byte BCPB, + byte ASWTRG, byte AEEVT, byte ACPC, byte ACPA, + byte WAVSEL, byte ENETRG, byte EEVT, byte EEVTEDG, + byte CPCDIS, byte CPCSTOP, byte BURST, byte CLKI, byte TCCLKS) { + // See Atmel doc 11057, section 37.7.11 on page 906 + /* +BSWTRG(30-31), Software Trigger Effect on TIOB + 0 NONE None + 1 SET Set + 2 CLEAR Clear + 3 TOGGLE Toggle + + BEEVT(28-29), External Event Effect on TIOB + 0 NONE None + 1 SET Set + 2 CLEAR Clear + 3 TOGGLE Toggle + + BCPC(26-27), RC Compare Effect on TIOB + 0 NONE None + 1 SET Set + 2 CLEAR Clear + 3 TOGGLE Toggle + + BCPB(24-25), RB Compare Effect on TIOB + 0 NONE None + 1 SET Set + 2 CLEAR Clear + 3 TOGGLE Toggle + + ASWTRG(22-23), Software Trigger Effect on TIOA + 0 NONE None + 1 SET Set + 2 CLEAR Clear + 3 TOGGLE Toggle + + AEEVT(20-21), External Event Effect on TIOA + 0 NONE None + 1 SET Set + 2 CLEAR Clear + 3 TOGGLE Toggle + + ACPC(18-19), RC Compare Effect on TIOA + 0 NONE None + 1 SET Set + 2 CLEAR Clear + 3 TOGGLE Toggle + + ACPA(16-17), RA Compare Effect on TIOA + 0 NONE None + 1 SET Set + 2 CLEAR Clear + 3 TOGGLE Toggle + + WAVE(15), This is always 1 in waform generation mode + 0: Capture Mode is enabled (Measure signals). + 1: Capture Mode is disabled (Generate wave). + + WAVSEL(13-14) + 0 UP UP mode without automatic trigger on RC Compare + 1 UPDOWN UPDOWN mode without automatic trigger on RC Compare + 2 UP_RC UP mode with automatic trigger on RC Compare + 3 UPDOWN_RC UPDOWN mode with automatic trigger on RC Compare + + ENETRG(12), External Event Trigger Enable + 0: the external event has no effect on the counter and its clock. + In this case, the selected external event only controls the TIOA output. + 1: the external event resets the counter and starts the counter clock + + EEVT(10-11), External Event Selection + 0 TIOB TIOB TIOB is input + 1 XC0 XC0 TIOB is output + 2 XC1 XC1 TIOB is output + 3 XC2 XC2 TIOB is output + + EEVTEDG(8-9), External Event Edge Selection + 0 NONE None + 1 RISING Rising edge + 2 FALLING Falling edge + 3 EDGE Each edge + + CPCDIS(7) + 0: counter clock is not disabled when RB loading occurs. + 1: counter clock is disabled when RB loading occurs + + CPCSTOP(6) + 0: counter clock is not stopped when RB loading occurs. + 1: counter clock is stopped when RB loading occurs + + BURST(4-5) + 0 NONE The clock is not gated by an external signal. + 1 XC0 XC0 is ANDed with the selected clock. + 2 XC1 XC1 is ANDed with the selected clock. + 3 XC2 XC2 is ANDed with the selected clock + + CLKI(3) + 0: counter is incremented on rising edge of the clock. + 1: counter is incremented on falling edge of the clock. + + TCCLKS(0-2) + 0 TIMER_CLOCK1 Clock selected: TCLK1 divider = 2 (1<<1) 42,000 kHz + 1 TIMER_CLOCK2 Clock selected: TCLK2 divider = 8 (1<<3) 10,500 kHz + 2 TIMER_CLOCK3 Clock selected: TCLK3 divider = 32 (1<<5) 2,625 kHz + 3 TIMER_CLOCK4 Clock selected: TCLK4 divider = 128 (1<<7) 656.25 Hz + 4 TIMER_CLOCK5 Clock selected: TCLK5 divider = 3*1024 0.027 Hz + 5 XC0 Clock selected: XC0 + 6 XC1 Clock selected: XC1 + 7 XC2 Clock selected: XC2 + + */ + unsigned long encodedValue; + byte WAVE=1; // Force wave generation + encodedValue=(TCCLKS & 7) + + ((CLKI & 1) << 3) + + ((BURST & 3) << 4) + + ((CPCSTOP & 1) << 6) + + ((CPCDIS & 1) << 7) + + ((EEVTEDG & 3) << 8) + + ((EEVT & 3) << 10) + + ((ENETRG & 1) << 12) + + ((WAVSEL & 3) << 13) + + ((WAVE & 1) << 15) + + ((ACPA &3) << 16) + + ((ACPC &3) << 18) + + ((AEEVT &3) << 20) + + ((ASWTRG &3) << 22) + + ((BCPB &3) << 24) + + ((BCPC &3) << 26) + + ((BEEVT &3) << 28) + + ((BSWTRG &3) << 30); + return (encodedValue); +} + +unsigned long getTC_Waveform(byte tcClock){ + return (encodeTC_WaveformMode( + 0,0,0,0, + 0,0,0,0, + 2,0,0,0, + 0,0,0,0,tcClock)); +} +unsigned long getTC_Waveform_A(byte tcClock){ + return (encodeTC_WaveformMode( + 0,0,0,0, + 0,0,3,0, + 2,0,0,0, + 0,0,0,0,tcClock)); +} + +unsigned long getTC_Waveform_B(byte tcClock){ + return (encodeTC_WaveformMode( + 0,0,3,0, + 0,0,0,0, + 2,0,0,0, + 0,0,0,0,tcClock)); +} + +void start_TC0() { + REG_TC0_CCR0=5; +} +void start_TC1() { + REG_TC0_CCR1=5; +} +void start_TC2() { + REG_TC0_CCR2=5; +} +void start_TC3() { + REG_TC1_CCR0=5; +} +void start_TC4() { + REG_TC1_CCR1=5; +} +void start_TC5() { + REG_TC1_CCR2=5; +} +void start_TC6() { + REG_TC2_CCR0=5; +} +void start_TC7() { + REG_TC2_CCR1=5; +} +void start_TC8() { + REG_TC2_CCR2=5; +} + +void stop_TC0() { + REG_TC0_CCR0=2; +} +void stop_TC1() { + REG_TC0_CCR1=2; +} +void stop_TC2() { + REG_TC0_CCR2=2; +} +void stop_TC3() { + REG_TC1_CCR0=2; +} +void stop_TC4() { + REG_TC1_CCR1=2; +} +void stop_TC5() { + REG_TC1_CCR2=2; +} +void stop_TC6() { + REG_TC2_CCR0=2; +} +void stop_TC7() { + REG_TC2_CCR1=2; +} +void stop_TC8() { + REG_TC2_CCR2=2; +} + + +void setupTC_Pin2_Timing(unsigned int period, byte tcClock){ + analogWrite(2,128); // Start with PWM setting + REG_TC0_CMR0=getTC_Waveform_A(tcClock); // Update Command Register + REG_TC0_RC0=period; // Set period (1 - 0xFFFF) + start_TC0(); // Start timer and generate square wave +} + +void setupTC_Pin5_Timing(unsigned int period, byte tcClock){ + analogWrite(5,128); // Start with PWM setting + REG_TC2_CMR0=getTC_Waveform_A(tcClock); // Update Command Register + REG_TC2_RC0=period; // Set period (1 - 0xFFFF) + start_TC6(); // Start timer and generate square wave +} + +void setupTC_Pin3_Timing(unsigned int period, byte tcClock){ + analogWrite(3,128); // Start with PWM setting + REG_TC2_CMR1=getTC_Waveform_A(tcClock); // Update Command Register + REG_TC2_RC1=period; // Set period (1 - 0xFFFF) + start_TC7(); // Start timer and generate square wave +} + +void setupTC_Pin11_Timing(unsigned int period, byte tcClock){ + analogWrite(11,128); // Start with PWM setting + REG_TC2_CMR2=getTC_Waveform_A(tcClock); // Update Command Register + REG_TC2_RC2=period; // Set period (1 - 0xFFFF) + start_TC8(); // Start timer and generate square wave +} + +void changeTC_Pin2_Period(unsigned int period){ + stop_TC0(); + REG_TC0_RC0=period; + start_TC0(); +} + +void changeTC_Pin5_Period(unsigned int period){ + stop_TC6(); + REG_TC2_RC0=period; + start_TC6(); +} + +void changeTC_Pin3_Period(unsigned int period){ + stop_TC7(); + REG_TC2_RC1=period; + start_TC7(); +} + +void changeTC_Pin11_Period(unsigned int period){ + stop_TC8(); + REG_TC2_RC2=period; + start_TC8(); +} + +void (*TC2_CallBack)(); +void (*TC3_CallBack)(); +void (*TC4_CallBack)(); +void (*TC5_CallBack)(); + +void TC2_Handler() { + TC_GetStatus(TC0, 2); + TC2_CallBack(); +} +void TC3_Handler() { + TC_GetStatus(TC1, 0); + TC3_CallBack(); +} +void TC4_Handler() { + TC_GetStatus(TC1, 1); + TC4_CallBack(); +} +void TC5_Handler() { + TC_GetStatus(TC1, 2); + TC5_CallBack(); +} + +void setupTC2_Interrupt(unsigned int period, byte tcClock, void (*isr)()){ + pmc_set_writeprotect(false); // Enavle direct TC register manipulation + pmc_enable_periph_clk(TC2_IRQn); // Enable predefined interrupt handler + TC2_CallBack=isr; // Route the call to user interrupt handler + REG_TC0_CMR2=getTC_Waveform(tcClock); // Update Command Register + REG_TC0_RC2=period; // Set period (1 - 0xFFFF) + start_TC2(); // Start timer and generate square wave + REG_TC0_IER2= TC_IER_CPCS; // Enable RC compare interrupt + REG_TC0_IDR2=~TC_IER_CPCS; // Disable all other timer interrpts + NVIC_EnableIRQ(TC2_IRQn); // Enable interrupt vector +} + +void setupTC3_Interrupt(unsigned int period, byte tcClock, void (*isr)()){ + pmc_set_writeprotect(false); // Enavle direct TC register manipulation + pmc_enable_periph_clk(TC3_IRQn); // Enable predefined interrupt handler + TC3_CallBack=isr; // Route the call to user interrupt handler + REG_TC1_CMR0=getTC_Waveform(tcClock); // Update Command Register + REG_TC1_RC0=period; // Set period (1 - 0xFFFF) + start_TC3(); // Start timer and generate square wave + REG_TC1_IER0= TC_IER_CPCS; // Enable RC compare interrupt + REG_TC1_IDR0=~TC_IER_CPCS; // Disable all other timer interrpts + NVIC_EnableIRQ(TC3_IRQn); // Enable interrupt vector +} + +void setupTC4_Interrupt(unsigned int period, byte tcClock, void (*isr)()){ + pmc_set_writeprotect(false); // Enavle direct TC register manipulation + pmc_enable_periph_clk(TC4_IRQn); // Enable predefined interrupt handler + TC4_CallBack=isr; // Route the call to user interrupt handler + REG_TC1_CMR1=getTC_Waveform(tcClock); // Update Command Register + REG_TC1_RC1=period; // Set period (1 - 0xFFFF) + start_TC4(); // Start timer and generate square wave + REG_TC1_IER1= TC_IER_CPCS; // Enable RC compare interrupt + REG_TC1_IDR1=~TC_IER_CPCS; // Disable all other timer interrpts + NVIC_EnableIRQ(TC4_IRQn); // Enable interrupt vector +} + +void setupTC5_Interrupt(unsigned int period, byte tcClock, void (*isr)()){ + pmc_set_writeprotect(false); // Enavle direct TC register manipulation + pmc_enable_periph_clk(TC5_IRQn); // Enable predefined interrupt handler + TC5_CallBack=isr; // Route the call to user interrupt handler + REG_TC1_CMR2=getTC_Waveform(tcClock); // Update Command Register + REG_TC1_RC2=period; // Set period (1 - 0xFFFF) + start_TC5(); // Start timer and generate square wave + REG_TC1_IER2= TC_IER_CPCS; // Enable RC compare interrupt + REG_TC1_IDR2=~TC_IER_CPCS; // Disable all other timer interrpts + NVIC_EnableIRQ(TC5_IRQn); // Enable interrupt vector +} + +void changeTC2_Period(unsigned int period) { + stop_TC2(); + REG_TC0_RC2=period; + start_TC2(); +} + +void changeTC3_Period(unsigned int period) { + stop_TC3(); + REG_TC1_RC0=period; + start_TC3(); +} + +void changeTC4_Period(unsigned int period) { + stop_TC4(); + REG_TC1_RC1=period; + start_TC4(); +} + +void changeTC5_Period(unsigned int period) { + stop_TC5(); + REG_TC1_RC2=period; + start_TC5(); +} \ No newline at end of file diff --git a/examples/bidirectional_streaming/bidirectional_streaming/DueTC.h b/examples/bidirectional_streaming/bidirectional_streaming/DueTC.h new file mode 100644 index 00000000..9de88851 --- /dev/null +++ b/examples/bidirectional_streaming/bidirectional_streaming/DueTC.h @@ -0,0 +1,70 @@ +/* + DueTC.h - Interface for Arduino DUE Timer Control implemented in DueTC.cpp + +Created by Olavi Kamppari, October 2013. +Released into the public domain. +*/ + +#ifdef __arm__ + +#ifndef DueTC_h +#define DueTC_h + +#include "Arduino.h" + +// tcClock values for Master Clock (MCK) frequency 84 MHz +// +// Duty cycle with different settings +// +// 0: MCK/2 = 42 MhZ, period 1 = 48 ns, period 0xffff = 3.1 ms, 320 Hz - 21 MHz +// 1: MCK/8 = 10.5 MhZ,period 1 = 190 ns, period 0xffff = 12 ms, 80 Hz - 5.3 MHz +// 2: MCK/32 = 2.6 MhZ, period 1 = 762 ns, period 0xffff = 50 ms, 20 Hz - 1.3 MHz +// 3: MCK/128 = 656 khZ, period 1 = 3.0 us, period 0xffff = 200 ms, 5 Hz - 328 kHz +// 4: MCK/3072 = 28 khZ, period 1 = 72 us, period 0xffff = 4.7 s, 0.2 Hz - 14 kHz +// +// The option 4 is also known as SLCK (slow clock) which is supposed to be at 32 kHz + +// Digital Output Pins for square waves +// ++++++++++++++++++++++++++++++++++++ +// Timer 0 +// TC0 in channel 0: A on PWM2, B on PWM13 +void setupTC_Pin2_Timing(unsigned int period, byte tcClock); +void changeTC_Pin2_Period(unsigned int period); + +// Timer 1, no channels awailable for square waves + +// Timer 2 +// TC 6 in channel 0: A on PWM5, B on PWM4 +// TC 7 in channel 1: A on PWM3, B on PWM10 +// TC 8 in channel 2: A on PWM11, B on PWM12 +void setupTC_Pin5_Timing(unsigned int period, byte tcClock); +void setupTC_Pin3_Timing(unsigned int period, byte tcClock); +void setupTC_Pin11_Timing(unsigned int period, byte tcClock); + +void changeTC_Pin5_Period(unsigned int period); +void changeTC_Pin3_Period(unsigned int period); +void changeTC_Pin11_Period(unsigned int period); + +// Timers for interrupts +// +++++++++++++++++++++ +// +// Timer 1 is used for system services (such as milli, micros, and delay) +// Timers 0, 6, 7, and 8 are reserved for square wave generation +// Timers 2-5 are available for timed interrupts +// +void setupTC2_Interrupt(unsigned int period, byte tcClock, void (*isr)()); +void setupTC3_Interrupt(unsigned int period, byte tcClock, void (*isr)()); +void setupTC4_Interrupt(unsigned int period, byte tcClock, void (*isr)()); +void setupTC5_Interrupt(unsigned int period, byte tcClock, void (*isr)()); + +void changeTC2_Period(unsigned int period); +void changeTC3_Period(unsigned int period); +void changeTC4_Period(unsigned int period); +void changeTC5_Period(unsigned int period); + +#endif + +#else +#error DueTC works only on Arduino DUE ARM based processor +#endif + diff --git a/examples/bidirectional_streaming/bidirectional_streaming.ino b/examples/bidirectional_streaming/bidirectional_streaming/bidirectional_streaming.ino similarity index 97% rename from examples/bidirectional_streaming/bidirectional_streaming.ino rename to examples/bidirectional_streaming/bidirectional_streaming/bidirectional_streaming.ino index 6fdf3412..45307902 100644 --- a/examples/bidirectional_streaming/bidirectional_streaming.ino +++ b/examples/bidirectional_streaming/bidirectional_streaming/bidirectional_streaming.ino @@ -1,4 +1,4 @@ -#include +#include "DueTC.h" // Requires modified USB files: CDC.cpp, USBCore.cpp, USBAPI.h. // These are found in the "board support package". On linux they usually @@ -56,14 +56,16 @@ uint32_t txc; bool errorFlag = 0; void blockingRead(uint8_t * b, uint32_t n) { - while (n) { - n -= SerialUSB.read(b, n); + int tr = n; + while (tr) { + tr -= SerialUSB.read(b + n - tr, tr); } } void blockingWrite(uint8_t * b, uint32_t n) { - while (n) { - n -= SerialUSB.write(b, n); + int tw = n; + while (tw) { + tw -= SerialUSB.write(b + n - tw, tw); } } diff --git a/examples/bidirectional_streaming/display.py b/examples/bidirectional_streaming/display.py new file mode 100644 index 00000000..5b7f9b9d --- /dev/null +++ b/examples/bidirectional_streaming/display.py @@ -0,0 +1,25 @@ +import struct +import numpy as np +import matplotlib.pyplot as mp + +with open('test.dat', 'r') as f: + r = f.read() +h = struct.unpack("<4I", r[0:16]) +n = h[0] +tx = np.array(struct.unpack("<%ih"%n, r[16:16+n*2])) +rx = np.array(struct.unpack("<%ih"%(2*n), r[16+n*2:16+n*3*2])) +e = ord(struct.unpack("c", r[16+n*3*2])[0]) +s = r[16+n*3*2+1:16+n*3*2+20] + +rx1 = rx[np.arange(0, 2*n-1, 2)] +rx2 = rx[np.arange(1, 2*n, 2)] + +print "header", h +print "error", e +print "timestamp", s + +mp.plot(tx) +mp.plot(rx1) +mp.plot(rx2) + +mp.show() diff --git a/examples/bidirectional_streaming/genfile.py b/examples/bidirectional_streaming/genfile.py new file mode 100644 index 00000000..ddbb3534 --- /dev/null +++ b/examples/bidirectional_streaming/genfile.py @@ -0,0 +1,15 @@ + +import numpy as np +import struct + +nS = 10000000 +d = np.random.randint(0, 2**12, nS) +adcF = 42000000 +clkDiv = 42 +nchan = 2 +txd = struct.pack("{}h".format(len(d)), *d) +h = struct.pack("<4I", nS, adcF, clkDiv, nchan) +rxdet = struct.pack("{}h".format(nS*2+10), *np.zeros(nS*2+10)) +s = "" + h + txd + rxdet +with open("test.dat", 'wb') as f: + f.write(s) diff --git a/examples/bidirectional_streaming_py/bidirectional_streaming.py b/examples/bidirectional_streaming_py/bidirectional_streaming.py deleted file mode 100644 index 38ce9017..00000000 --- a/examples/bidirectional_streaming_py/bidirectional_streaming.py +++ /dev/null @@ -1,99 +0,0 @@ - -import numpy as np -import serial -import struct -import time -import glob -import hashlib - -currentport = glob.glob('/dev/ttyACM*')[0] -print "Port: ", currentport - -port = serial.Serial(currentport, baudrate=0, parity = serial.PARITY_NONE, stopbits = serial.STOPBITS_ONE, bytesize = serial.EIGHTBITS, timeout=0) - -savepath = '/users/nsr/barbour/junk/' - -# Just for testing purposes. -testdata = np.ones(1000000) -for i in range(len(testdata)): - testdata[i] = i%4096 - -def blockingWrite(data): - n = len(data) - while n: - n -= port.write(data) - -def blockingRead(n): - r = '' - while n: - s = port.read(n) - r += s - n -= len(s) - return r - -class sound: - def __init__(self, data=testdata, clkDiv=84, adcF=21000000): - self.nS = len(data) - self.clkDiv = clkDiv - self.adcF = adcF - self.txd = struct.pack("<%sH"%(len(data)), *data) - # These are to regulate flow through the port. It seems it can - # block if too many data are written while reading. - self.maxOut = 65536 - self.maxIn = self.maxOut - self.maxAdvance = 65536 - self.hash = hashlib.md5(self.txd).hexdigest() - print "Sound has hash: ", self.hash - def episode(self): - # Number of bytes to transmit. - nb = len(self.txd) - tstamp = time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()) - print tstamp - print self.hash - h = struct.pack("<3I", self.nS, self.adcF, self.clkDiv) - blockingWrite(h) - # DAC and ADC... - fname = "DnA_"+tstamp+".dat" - # Leave as string for immediate writing. - nchan = blockingRead(4) - preamble = fname + h + nchan - # Later we need the number. - nchan = struct.unpack(" Date: Tue, 5 Mar 2019 00:08:46 +0100 Subject: [PATCH 5/8] Improve formatting of readme.md. --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 2384b3e3..6e8e371f 100644 --- a/README.md +++ b/README.md @@ -55,18 +55,30 @@ A file is generated by the python script that contains a few control parameters Connect DAC0 to A0 and GND to A1 (for instance). $ python genfile.py + $ time ./bidi test.dat /dev/ttyACM0 + test.dat + 10000000 42000000 42 2 + 2019-03-04_01:05:30 + real 0m20.111s + user 0m0.020s + sys 0m1.088s + $python display.py + header (10000000, 42000000, 42, 2) + error 0 + timestamp 2019-03-04_01:05:30 + If you find the streaming to be unreliable (an error is raised), there is probably a bottleneck somewhere. Things to try: From 23534531e735650e32a30065ec9911f7f2e074bd Mon Sep 17 00:00:00 2001 From: Boris BARBOUR Date: Tue, 5 Mar 2019 00:11:30 +0100 Subject: [PATCH 6/8] Further formatting of readme.md... --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 6e8e371f..8c516d1e 100644 --- a/README.md +++ b/README.md @@ -35,13 +35,20 @@ Speed test This a simple speed test. The Arduino sketch just reads available data on the native USB serial port using the new block read member and sends it back, in a loop. On the host computer, a large array is written to and read back from the serial tty by a short C++ program making use of the "select" call for efficient sequencing of the i/o operations. The port is specified as a parameter ("0" in the example). $ g++ -O3 -o speed_test speed_test.cpp + $ time ./speed_test 0 + Test round-trip streaming with 100000000 bytes. + /dev/ttyACM0 + Arrays equal! + real 0m37.852s + user 0m0.288s + sys 0m1.576s 100 Mb in ~40s is 2.5 Mb/s. From ce69b12640654ffef3deffa14be0f480dc629355 Mon Sep 17 00:00:00 2001 From: Boris Barbour Date: Tue, 5 Mar 2019 12:38:09 +0100 Subject: [PATCH 7/8] Tweaked examples text. --- README.md | 17 +++++++++-------- examples/bidirectional_streaming/bidi.cpp | 2 +- examples/bidirectional_streaming/genfile.py | 5 +++-- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 8c516d1e..d388d893 100644 --- a/README.md +++ b/README.md @@ -63,24 +63,25 @@ Connect DAC0 to A0 and GND to A1 (for instance). $ python genfile.py +$ g++ -O3 -o bidi bidi.cpp + $ time ./bidi test.dat /dev/ttyACM0 test.dat -10000000 42000000 42 2 - -2019-03-04_01:05:30 +1000000 42000000 42 2 +2019-03-05_12:26:33 -real 0m20.111s +real 0m2.011s -user 0m0.020s +user 0m0.144s -sys 0m1.088s +sys 0m1.802s -$python display.py +$ python display.py -header (10000000, 42000000, 42, 2) +header (1000000, 42000000, 42, 2) error 0 diff --git a/examples/bidirectional_streaming/bidi.cpp b/examples/bidirectional_streaming/bidi.cpp index f0a4c1d0..2ed73091 100644 --- a/examples/bidirectional_streaming/bidi.cpp +++ b/examples/bidirectional_streaming/bidi.cpp @@ -89,7 +89,7 @@ int main(int argc, char *argv[]) { // Error and timestamp add 20 bytes. addr = (char*)mmap(NULL, eo+20, PROT_READ | PROT_WRITE, MAP_SHARED, f, 0); // Write nchan to file. - memcpy(addr+12, &h[3], 4); + std::memcpy(addr+12, &h[3], 4); // Write and read counters. int w = 0; diff --git a/examples/bidirectional_streaming/genfile.py b/examples/bidirectional_streaming/genfile.py index ddbb3534..29f5c382 100644 --- a/examples/bidirectional_streaming/genfile.py +++ b/examples/bidirectional_streaming/genfile.py @@ -2,8 +2,9 @@ import numpy as np import struct -nS = 10000000 -d = np.random.randint(0, 2**12, nS) +nS = 1000000 +d = np.arange(nS)%4096 +#d = np.random.randint(0, 2**12, nS) adcF = 42000000 clkDiv = 42 nchan = 2 From fe19912d3a0addac24fc6b250443fd800d1a74e5 Mon Sep 17 00:00:00 2001 From: Boris Barbour Date: Tue, 5 Mar 2019 12:49:14 +0100 Subject: [PATCH 8/8] Text tweaks to readme. --- README.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d388d893..d028cb7e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ +Optimised host <-> Arduino Due streaming data transfer over native USB port +============================================================================ + Streaming data over the native USB to the Arduino Due was disappointingly slow, about 60kb/s, yet transmission from Arduino to host computer could run at Mb/s, suggesting that the USB connection was not limiting. Inspection of the code showed that data was received byte by byte through a chain of functions with an overhead of massively redundant checks. -This repository is a fork of the official code and contains optimisations of the USB code. With them, it is possible to perform round-trip streaming of data to and from the Arduino Due at 1.8Mb/s at least and probably much faster (the Arduino code was not limiting in these tests driven by a simple python script on the host computer). +This repository is a fork of the official code and contains optimisations of the USB code. With them, it was possible to perform round-trip streaming of data to and from the Arduino Due at 2.5Mb/s. Only 3 files have been changed. If you don't wish to deal with the whole git repository, the files CDC.cpp, USBAPI.h and USBCore.cpp in the ArduinoCore-sam/cores/arduino/USB/ directory can be transferred into your local board package, which under my linux is found here: ~/.arduino15/packages/arduino/hardware/sam//cores/arduino/USB/ @@ -10,7 +13,7 @@ A pull request has been submitted but not acted upon. Changes to the arduino libraries ================================ -A (non-blocking) overloaded read function that accepts as parameters a buffer and size is now provided as a member of the SerialUSB class. If neither of the read functions is used, for instance during a DMA application, the user may need to call "SerialUSB.accept()" periodically if there is a danger that the buffer will sometimes be too full to accept a full FIFO (512 bytes) of data upon interrupt, as this will cause reception to block. The CDC_SERIAL_BUFFER_SIZE can be increased from the original 512 bytes to reduce this risk. +A (non-blocking) overloaded read function that accepts as parameters a buffer and size is now provided as a member of the SerialUSB class. If neither of the read functions is used, for instance during a DMA application, the user may need to call "SerialUSB.accept()" periodically if there is a danger that the buffer will sometimes be too full to accept a full FIFO (512 bytes) of data upon interrupt, as this will cause reception to block. The CDC_SERIAL_BUFFER_SIZE can be increased (in the library code) from the original 512 bytes to reduce this risk. The SerialUSB (Serial_) class has been modified to remove all mention of the RingBuffer used elsewhere in the Arduino code but NOT here. This was confusing at best. @@ -32,7 +35,7 @@ Examples Speed test ---------- -This a simple speed test. The Arduino sketch just reads available data on the native USB serial port using the new block read member and sends it back, in a loop. On the host computer, a large array is written to and read back from the serial tty by a short C++ program making use of the "select" call for efficient sequencing of the i/o operations. The port is specified as a parameter ("0" in the example). +This is a simple speed test. The Arduino sketch just reads available data on the native USB serial port using the new block read member and sends it back, in a loop. On the host computer, a large array is written to and read back from the serial tty by a short C++ program making use of the "select" call for efficient sequencing of the i/o operations. The port is specified as a parameter ("0" in the example). $ g++ -O3 -o speed_test speed_test.cpp @@ -53,8 +56,8 @@ sys 0m1.576s 100 Mb in ~40s is 2.5 Mb/s. -Bidirectional streaming ------------------------ +Bidirectional streaming with DAC and ADC DMA +-------------------------------------------- This example is much more involved, but reflects the motivation for this project. An array is streamed from the host computer to the arduino, where it is transferred to the DAC by DMA. At the same time, two ADC channels are acquired at the same total frequency and streamed back to the host. Here the speed is limited by the maximum ADC rate of 1 MHz, corresponding to an arduino -> host data rate of 2 Mb/s (with 1 Mb/s flowing in the opposite direction). A timer library is included from https://github.com/OliviliK/DueTC A file is generated by the python script that contains a few control parameters in the header and data for the DAC, as well as space for the ADC data to be acquired, an error flag and a timestamp. The file is memory-mapped to enable simultaneous i/o. The path to the data file and the tty port are given as parameters.