ESP32 Voice Assistant Arduino Sketch
#include <Arduino.h>
#include <WiFi.h>
#include <SPIFFS.h>
#include <HTTPClient.h>
#include <driver/i2s.h>
// ** Wi-Fi Credentials (replace with your network SSID/Password) **
const char* WIFI_SSID = "YourSSID";
const char* WIFI_PASSWORD = "YourPassword";
// ** Server Configuration (replace with your server address/endpoint) **
const char* serverHost = "http://
192.168.1.180:8888"; // Example server address
const char* uploadEndpoint = "/uploadAudio";
const char* triggerEndpoint = "/playbackReady"; // Endpoint to poll for
playback trigger
const char* audioEndpoint = "/audio"; // Endpoint to fetch audio
stream
// ** Audio and I2S Settings **
#define I2S_SAMPLE_RATE 16000 // Audio sample rate in Hz
#define I2S_SAMPLE_BITS 16 // Bits per audio sample (INMP441
supports 24-bit, using 16-bit here)
#define I2S_CHANNEL_NUM 1 // Number of channels (mono)
#define RECORD_DURATION_SEC 5 // Recording duration in seconds
#define I2S_READ_CHUNK 1024 // I2S read chunk size in bytes
(buffer size per read)
#define VOLUME_GAIN
2 // Volume gain (simple amplification factor for recorded
audio)
// I2S pins for INMP441 microphone (I2S input) – adjust these to your wiring
const int PIN_I2S_MIC_SD = 32; // I2S data in (from INMP441 DOUT)
const int PIN_I2S_MIC_WS = 15; // I2S word select / LRCL (from INMP441 L/
R)
const int PIN_I2S_MIC_SCK = 14; // I2S clock (from INMP441 CLK)
// I2S pins for MAX98357A speaker (I2S output) – adjust these to your wiring
const int PIN_I2S_SPK_SD = 25; // I2S data out (to MAX98357A DIN)
const int PIN_I2S_SPK_WS = 27; // I2S word select / LRCL (to MAX98357A
LRC)
const int PIN_I2S_SPK_SCK = 26; // I2S clock (to MAX98357A BCLK)
// Wake-up and status LED pins
1
const int PIN_WAKE_BUTTON = 33; // GPIO for wake button (must be RTC-
capable for deep sleep wake)
const int PIN_STATUS_LED = 2; // GPIO for status LED (on-board LED on
many ESP32 boards)
// Global flags and handles
volatile bool uploadDone = false; // Set true when audio upload is
complete
volatile bool playbackTriggered = false; // Set true when server signals
audio ready
SemaphoreHandle_t
playSemaphore; // Binary semaphore to trigger audio playback task
// Function prototypes
void SPIFFSInit();
void listSPIFFS();
void printSpaceInfo();
void i2sInitINMP441();
void i2sInitMax98357A();
void wifiConnect(void *pvParameters);
void I2SAudioRecord(void *pvParameters);
void I2SAudioRecord_dataScale(int16_t *buffer, size_t count);
void wavHeader(uint8_t* header, uint32_t dataSize, uint32_t sampleRate,
uint16_t channels, uint16_t bitsPerSample);
void uploadFile();
void semaphoreWait(void *pvParameters);
void broadcastAudio(void *pvParameters);
void setup() {
Serial.begin(115200);
pinMode(PIN_STATUS_LED, OUTPUT);
digitalWrite(PIN_STATUS_LED, LOW); // LED off initially
pinMode(PIN_WAKE_BUTTON, INPUT_PULLUP);
// Determine wake-up reason and handle deep sleep logic
if (esp_sleep_get_wakeup_cause() != ESP_SLEEP_WAKEUP_EXT0) {
// Not woken by external GPIO (either fresh power-on or other wake)
if (digitalRead(PIN_WAKE_BUTTON) == HIGH) {
// Button not pressed on startup -> enter deep sleep until button is
pressed
Serial.println("No wake button press detected. Entering deep
sleep...");
// Configure EXT0 wakeup on GPIO33 (button press, active LOW)
esp_sleep_enable_ext0_wakeup(GPIO_NUM_33, 0);
esp_deep_sleep_start();
}
// If button is pressed at startup, continue to run (do not sleep)
} else {
// Woken up from deep sleep by button press (EXT0)
Serial.println("Woke up from deep sleep by button press.");
}
2
// Initialize file system and check available space
SPIFFSInit();
printSpaceInfo();
listSPIFFS();
// Initialize I2S for microphone input
i2sInitINMP441();
// Create binary semaphore for playback trigger
playSemaphore = xSemaphoreCreateBinary();
if (playSemaphore == NULL) {
Serial.println("Error creating semaphore!");
}
// Create FreeRTOS tasks (pinned to specific cores)
xTaskCreatePinnedToCore(wifiConnect, "WiFi_Connect", 4096, NULL, 1, NULL,
0); // WiFi connection task on core 0
xTaskCreatePinnedToCore(I2SAudioRecord, "I2S_Record", 8192, NULL, 2, NULL,
1); // Audio recording task on core 1
xTaskCreatePinnedToCore(broadcastAudio, "Audio_Play", 8192, NULL, 2, NULL,
1); // Audio playback task on core 1 (waits on semaphore)
// Note: semaphoreWait task will be created after upload is done (from
I2SAudioRecord task)
}
void loop() {
// Nothing to do in main loop, tasks handle all functionality.
// Sleep to reduce power consumption while waiting.
delay(1000);
}
/* ---------- SPIFFSInit: Initialize and mount SPIFFS ---------- */
void SPIFFSInit() {
if (!SPIFFS.begin(true)) { // true => format on fail
Serial.println("SPIFFS Mount Failed, formatting...");
// If formatting also fails, halt execution
if (!SPIFFS.begin(true)) {
Serial.println("SPIFFS initial format failed!");
while (1) { delay(100); }
}
}
Serial.println("SPIFFS mounted successfully.");
}
/* ---------- listSPIFFS: List files in SPIFFS ---------- */
void listSPIFFS() {
Serial.println("SPIFFS contents:");
File root = SPIFFS.open("/");
if (!root) {
3
Serial.println("Failed to open SPIFFS root directory");
return;
}
File file = root.openNextFile();
if (!file) {
Serial.println(" (No files found)");
}
while (file) {
Serial.printf(" %s - %d bytes\n", file.name(), file.size());
file = root.openNextFile();
}
}
/* ---------- printSpaceInfo: Show SPIFFS total/used space ---------- */
void printSpaceInfo() {
size_t total = SPIFFS.totalBytes();
size_t used = SPIFFS.usedBytes();
Serial.printf("SPIFFS Total: %d bytes, Used: %d bytes, Free: %d bytes\n",
total, used, total - used);
}
/* ---------- i2sInitINMP441: Configure I2S for INMP441 mic (I2S ADC mode)
---------- */
void i2sInitINMP441() {
// I2S driver configuration for recording (microphone input)
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER |
I2S_MODE_RX), // Master mode, RX only
.sample_rate = I2S_SAMPLE_RATE,
.bits_per_sample = (i2s_bits_per_sample_t)I2S_SAMPLE_BITS,
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT, // Only one
channel (left)
.communication_format = I2S_COMM_FORMAT_STAND_I2S,
.intr_alloc_flags =
ESP_INTR_FLAG_LEVEL1, // Interrupt level 1
.dma_buf_count = 8,
.dma_buf_len = 1024, // DMA buffer
length (bytes)
.use_apll = false,
.tx_desc_auto_clear = false,
.fixed_mclk = 0
};
// I2S pin configuration for INMP441
i2s_pin_config_t pin_config = {
.bck_io_num = PIN_I2S_MIC_SCK,
.ws_io_num = PIN_I2S_MIC_WS,
.data_out_num = -1, // Not used (no TX in this
configuration)
.data_in_num = PIN_I2S_MIC_SD
};
// Install and start I2S driver
4
esp_err_t err = i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);
if (err != ESP_OK) {
Serial.printf("Failed to install I2S driver for INMP441: %d\n", err);
}
err = i2s_set_pin(I2S_NUM_0, &pin_config);
if (err != ESP_OK) {
Serial.printf("Failed to set I2S pins for INMP441: %d\n", err);
}
// Clear I2S RX buffer
i2s_zero_dma_buffer(I2S_NUM_0);
Serial.println("I2S microphone (INMP441) initialized.");
}
/* ---------- i2sInitMax98357A: Configure I2S for MAX98357A DAC (I2S DAC
mode) ---------- */
void i2sInitMax98357A() {
// I2S driver configuration for playback (speaker output)
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER |
I2S_MODE_TX), // Master mode, TX only
.sample_rate = I2S_SAMPLE_RATE,
.bits_per_sample = (i2s_bits_per_sample_t)I2S_SAMPLE_BITS,
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT, // mono channel
(left)
.communication_format = I2S_COMM_FORMAT_STAND_I2S,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 8,
.dma_buf_len = 1024, // DMA buffer
length
.use_apll = false,
.tx_desc_auto_clear = true, // Auto clear TX buffer on underflow
.fixed_mclk = 0
};
// I2S pin configuration for MAX98357A
i2s_pin_config_t pin_config = {
.bck_io_num = PIN_I2S_SPK_SCK,
.ws_io_num = PIN_I2S_SPK_WS,
.data_out_num = PIN_I2S_SPK_SD,
.data_in_num = -1 // Not used (no RX in this configuration)
};
// Install and start I2S driver for speaker
esp_err_t err = i2s_driver_install(I2S_NUM_1, &i2s_config, 0, NULL);
if (err != ESP_OK) {
Serial.printf("Failed to install I2S driver for MAX98357A: %d\n", err);
}
err = i2s_set_pin(I2S_NUM_1, &pin_config);
if (err != ESP_OK) {
Serial.printf("Failed to set I2S pins for MAX98357A: %d\n", err);
}
// Clear I2S TX buffer
i2s_zero_dma_buffer(I2S_NUM_1);
5
Serial.println("I2S speaker (MAX98357A) initialized.");
}
/* ---------- wifiConnect: Task to connect to Wi-Fi network ---------- */
void wifiConnect(void *pvParameters) {
Serial.printf("Connecting to WiFi SSID: %s\n", WIFI_SSID);
WiFi.mode(WIFI_STA);
WiFi.begin(WIFI_SSID, WIFI_PASSWORD);
// Attempt to connect until successful
while (WiFi.status() != WL_CONNECTED) {
Serial.print(".");
vTaskDelay(pdMS_TO_TICKS(500));
}
Serial.printf("\nWiFi connected, IP address: %s\n",
WiFi.localIP().toString().c_str());
// WiFi connected, turn on LED briefly to indicate success (if desired)
digitalWrite(PIN_STATUS_LED, HIGH);
vTaskDelay(pdMS_TO_TICKS(200));
digitalWrite(PIN_STATUS_LED, LOW);
// Delete this task as it's no longer needed
vTaskDelete(NULL);
}
/* ---------- I2SAudioRecord: Task to record audio from I2S mic and upload to
server ---------- */
void I2SAudioRecord(void *pvParameters) {
// Buffer for I2S reads
int16_t *i2sBuffer = (int16_t*) malloc(I2S_READ_CHUNK);
if (!i2sBuffer) {
Serial.println("Failed to allocate I2S read buffer");
vTaskDelete(NULL);
}
// Remove any existing audio file
const char* filename = "/audio.wav";
if (SPIFFS.exists(filename)) {
SPIFFS.remove(filename);
}
// Open file for recording
File audioFile = SPIFFS.open(filename, FILE_WRITE);
if (!audioFile) {
Serial.println("Failed to open file for writing");
free(i2sBuffer);
vTaskDelete(NULL);
}
// Prepare WAV header with placeholder values
uint8_t header[44];
wavHeader(header, 0, I2S_SAMPLE_RATE, I2S_CHANNEL_NUM, I2S_SAMPLE_BITS);
audioFile.write(header, 44); // Reserve space for WAV header
6
// Start recording
Serial.println("Starting audio recording...");
digitalWrite(PIN_STATUS_LED, HIGH); // Turn on LED to indicate recording
size_t totalBytesWritten = 0;
size_t bytesRead = 0;
unsigned long startMillis = millis();
while (millis() - startMillis < RECORD_DURATION_SEC * 1000) {
// Read from I2S (blocking until I2S_READ_CHUNK bytes are read)
if (i2s_read(I2S_NUM_0, i2sBuffer, I2S_READ_CHUNK, &bytesRead,
pdMS_TO_TICKS(100)) == ESP_OK) {
if (bytesRead > 0) {
// Optionally scale/adjust the audio data (amplify)
I2SAudioRecord_dataScale(i2sBuffer, bytesRead / sizeof(int16_t));
// Write raw audio data to file
audioFile.write((const uint8_t*) i2sBuffer, bytesRead);
totalBytesWritten += bytesRead;
}
}
}
// Stop recording
digitalWrite(PIN_STATUS_LED, LOW); // Turn off recording LED
audioFile.close();
Serial.printf("Recording finished, %d bytes written to %s\n",
totalBytesWritten, filename);
// Update the WAV header with correct sizes now that recording is done
File f = SPIFFS.open(filename, FILE_WRITE);
wavHeader(header, totalBytesWritten, I2S_SAMPLE_RATE, I2S_CHANNEL_NUM,
I2S_SAMPLE_BITS);
if (f) {
f.seek(0);
f.write(header, 44);
f.close();
}
// Show SPIFFS usage after recording
printSpaceInfo();
// Upload the recorded file to the server
uploadFile();
// Mark upload done (for other tasks)
uploadDone = true;
// Create a task to wait for playback trigger from server
xTaskCreatePinnedToCore(semaphoreWait, "Trigger_Wait", 4096, NULL, 1,
NULL, 0);
// Delete this task (recording is done)
free(i2sBuffer);
7
vTaskDelete(NULL);
}
/* ---------- I2SAudioRecord_dataScale: Adjust audio data (e.g., amplify or
convert) ---------- */
void I2SAudioRecord_dataScale(int16_t *buffer, size_t count) {
// Simple volume gain by factor VOLUME_GAIN
for (size_t i = 0; i < count; ++i) {
int32_t sample = buffer[i] * VOLUME_GAIN;
// Clip to 16-bit range to avoid overflow
if (sample > 32767) sample = 32767;
if (sample < -32768) sample = -32768;
buffer[i] = (int16_t) sample;
}
}
/* ---------- wavHeader: Generate a 44-byte WAV file header ---------- */
void wavHeader(uint8_t* header, uint32_t dataSize, uint32_t sampleRate,
uint16_t channels, uint16_t bitsPerSample) {
// Chunk ID "RIFF"
header[0] = 'R'; header[1] = 'I'; header[2] = 'F'; header[3] = 'F';
// Chunk size = 36 + subchunk2Size (dataSize)
uint32_t fileSize = dataSize + 36;
header[4] = (uint8_t)(fileSize & 0xFF);
header[5] = (uint8_t)((fileSize >> 8) & 0xFF);
header[6] = (uint8_t)((fileSize >> 16) & 0xFF);
header[7] = (uint8_t)((fileSize >> 24) & 0xFF);
// Format "WAVE"
header[8] = 'W'; header[9] = 'A'; header[10] = 'V'; header[11] = 'E';
// Subchunk1 ID "fmt "
header[12] = 'f'; header[13] = 'm'; header[14] = 't'; header[15] = ' ';
// Subchunk1 size (16 for PCM)
header[16] = 16; header[17] = 0; header[18] = 0; header[19] = 0;
// Audio format (1 = PCM)
header[20] = 1; header[21] = 0;
// Number of channels
header[22] = (uint8_t)channels;
header[23] = 0;
// Sample rate
header[24] = (uint8_t)(sampleRate & 0xFF);
header[25] = (uint8_t)((sampleRate >> 8) & 0xFF);
header[26] = (uint8_t)((sampleRate >> 16) & 0xFF);
header[27] = (uint8_t)((sampleRate >> 24) & 0xFF);
// Byte rate = sampleRate * channels * bitsPerSample/8
uint32_t byteRate = sampleRate * channels * (bitsPerSample / 8);
header[28] = (uint8_t)(byteRate & 0xFF);
header[29] = (uint8_t)((byteRate >> 8) & 0xFF);
header[30] = (uint8_t)((byteRate >> 16) & 0xFF);
header[31] = (uint8_t)((byteRate >> 24) & 0xFF);
// Block align = channels * bitsPerSample/8
uint16_t blockAlign = channels * (bitsPerSample / 8);
8
header[32] = (uint8_t)(blockAlign & 0xFF);
header[33] = (uint8_t)((blockAlign >> 8) & 0xFF);
// Bits per sample
header[34] = (uint8_t)bitsPerSample;
header[35] = 0;
// Subchunk2 ID "data"
header[36] = 'd'; header[37] = 'a'; header[38] = 't'; header[39] = 'a';
// Subchunk2 size (dataSize)
header[40] = (uint8_t)(dataSize & 0xFF);
header[41] = (uint8_t)((dataSize >> 8) & 0xFF);
header[42] = (uint8_t)((dataSize >> 16) & 0xFF);
header[43] = (uint8_t)((dataSize >> 24) & 0xFF);
}
/* ---------- uploadFile: Upload the recorded WAV file to the Node.js server
---------- */
void uploadFile() {
const char* filename = "/audio.wav";
File file = SPIFFS.open(filename, FILE_READ);
if (!file) {
Serial.println("Error: Recorded file not found for upload!");
return;
}
if (WiFi.status() != WL_CONNECTED) {
Serial.println("WiFi not connected, skipping file upload.");
file.close();
return;
}
uint32_t fileSize = file.size();
Serial.printf("Uploading %s to server (%lu bytes)...\n", filename,
fileSize);
HTTPClient http;
String url = String(serverHost) + uploadEndpoint;
http.begin(url);
http.addHeader("Content-Type", "audio/wav");
http.addHeader("Content-Length", String(fileSize));
int httpResponseCode = http.sendRequest("POST", &file, fileSize);
file.close();
if (httpResponseCode == 200) {
String response = http.getString();
Serial.println("Server transcription response:");
Serial.println(response);
} else {
Serial.printf("File upload failed, HTTP response code: %d\n",
httpResponseCode);
}
http.end();
}
9
/* ---------- semaphoreWait: Task to poll server for playback trigger, then
signal audio playback ---------- */
void semaphoreWait(void *pvParameters) {
Serial.println("Polling server for playback trigger...");
bool triggered = false;
HTTPClient http;
String url = String(serverHost) + triggerEndpoint;
while (!triggered) {
if (WiFi.status() != WL_CONNECTED) {
Serial.println("WiFi disconnected while waiting for trigger.");
break;
}
http.begin(url);
int httpCode = http.GET();
if (httpCode == 200) {
String resp = http.getString();
// Expect server to respond with "READY" or "1" when audio is prepared
if (resp.indexOf("READY") != -1 || resp.indexOf("1") != -1) {
Serial.println("Playback trigger received from server!");
triggered = true;
playbackTriggered = true;
// Signal the playback task to start (release semaphore)
xSemaphoreGive(playSemaphore);
}
} else {
Serial.printf("Trigger poll HTTP code: %d\n", httpCode);
}
http.end();
if (!triggered) {
// Blink LED while waiting for trigger
digitalWrite(PIN_STATUS_LED, !digitalRead(PIN_STATUS_LED));
vTaskDelay(pdMS_TO_TICKS(1000)); // Wait 1 second before polling again
}
}
// Ensure LED is off once triggered or if exiting
digitalWrite(PIN_STATUS_LED, LOW);
// Delete this task after signaling
vTaskDelete(NULL);
}
/* ---------- broadcastAudio: Task to stream audio from server and play via
I2S DAC ---------- */
void broadcastAudio(void *pvParameters) {
// Wait for semaphore indicating audio is ready to play
xSemaphoreTake(playSemaphore, portMAX_DELAY);
// Initialize I2S for speaker output
i2sInitMax98357A();
Serial.println("Starting audio playback...");
// Connect to server to download/stream the audio data
10
HTTPClient http;
String url = String(serverHost) + audioEndpoint;
http.begin(url);
int httpCode = http.GET();
if (httpCode == 200) {
WiFiClient *stream = http.getStreamPtr();
// Read data in chunks and write to I2S
uint8_t buff[512];
while (http.connected()) {
size_t len = stream->available();
if (len == 0) {
// No data yet, yield to other tasks
vTaskDelay(pdMS_TO_TICKS(10));
continue;
}
if (len > sizeof(buff)) len = sizeof(buff);
int bytesRead = stream->readBytes(buff, len);
if (bytesRead > 0) {
size_t bytes_written = 0;
i2s_write(I2S_NUM_1, buff, bytesRead, &bytes_written, portMAX_DELAY);
}
// Break if end of stream
if (bytesRead <= 0) {
break;
}
}
Serial.println("Audio playback completed.");
} else {
Serial.printf("Failed to GET audio stream, HTTP code: %d\n", httpCode);
}
http.end();
// Optional: uninitialize I2S to save power (not strictly needed before
deep sleep)
i2s_driver_uninstall(I2S_NUM_1);
// Enter deep sleep after playback (setup wake on button for next use)
Serial.println("Entering deep sleep, waiting for next button press to
wake...");
// Configure wakeup on button press again
esp_sleep_enable_ext0_wakeup(GPIO_NUM_33, 0);
esp_deep_sleep_start();
// (No need to vTaskDelete here because deep sleep stops execution)
vTaskDelete(NULL);
}
11