diff --git a/.devops/main-cuda.Dockerfile b/.devops/main-cuda.Dockerfile index 75a395c70f2..b9f4873937b 100644 --- a/.devops/main-cuda.Dockerfile +++ b/.devops/main-cuda.Dockerfile @@ -13,11 +13,10 @@ WORKDIR /app ARG CUDA_DOCKER_ARCH=all # Set nvcc architecture ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -# Enable cuBLAS -ENV GGML_CUDA=1 RUN apt-get update && \ apt-get install -y build-essential libsdl2-dev wget cmake git \ + && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* # Ref: https://stackoverflow.com/a/53464012 @@ -25,7 +24,14 @@ ENV CUDA_MAIN_VERSION=12.3 ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH COPY .. . -RUN make base.en +# Enable cuBLAS +RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1" + +RUN find /app/build -name "*.o" -delete && \ + find /app/build -name "*.a" -delete && \ + rm -rf /app/build/CMakeFiles && \ + rm -rf /app/build/cmake_install.cmake && \ + rm -rf /app/build/_deps FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime ENV CUDA_MAIN_VERSION=12.3 @@ -34,7 +40,11 @@ WORKDIR /app RUN apt-get update && \ apt-get install -y curl ffmpeg wget cmake git \ + && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* COPY --from=build /app /app +RUN du -sh /app/* +RUN find /app -type f -size +100M +ENV PATH=/app/build/bin:$PATH ENTRYPOINT [ "bash", "-c" ] diff --git a/.devops/main-intel.Dockerfile b/.devops/main-intel.Dockerfile new file mode 100644 index 00000000000..1b5859715d4 --- /dev/null +++ b/.devops/main-intel.Dockerfile @@ -0,0 +1,28 @@ +ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04 + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y build-essential libsdl2-dev wget cmake git \ + && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* + +COPY .. . +# Enable SYCL +ARG GGML_SYCL_F16=OFF +RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ + echo "GGML_SYCL_F16 is set" \ + && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ + fi && \ + make base.en CMAKE_ARGS="-DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16}" + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \ + && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* + +COPY --from=build /app /app +ENV PATH=/app/build/bin:$PATH +ENTRYPOINT [ "bash", "-c" ] diff --git a/.devops/main-musa.Dockerfile b/.devops/main-musa.Dockerfile new file mode 100644 index 00000000000..bbc33993881 --- /dev/null +++ b/.devops/main-musa.Dockerfile @@ -0,0 +1,39 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG MUSA_VERSION=rc4.0.1 +# Target the MUSA build image +ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-devel-ubuntu${UBUNTU_VERSION} +# Target the MUSA runtime image +ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_MUSA_DEV_CONTAINER} AS build +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y build-essential libsdl2-dev wget cmake git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* /var/tmp/* + +COPY .. . +# Enable muBLAS +RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1" + +RUN find /app/build -name "*.o" -delete && \ + find /app/build -name "*.a" -delete && \ + rm -rf /app/build/CMakeFiles && \ + rm -rf /app/build/cmake_install.cmake && \ + rm -rf /app/build/_deps + +FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y curl ffmpeg wget cmake git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* /var/tmp/* + +COPY --from=build /app /app +RUN du -sh /app/* +RUN find /app -type f -size +100M +ENV PATH=/app/build/bin:$PATH +ENTRYPOINT [ "bash", "-c" ] diff --git a/.devops/main.Dockerfile b/.devops/main.Dockerfile index e8424126057..e1eb9b33700 100644 --- a/.devops/main.Dockerfile +++ b/.devops/main.Dockerfile @@ -16,4 +16,5 @@ RUN apt-get update && \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* COPY --from=build /app /app +ENV PATH=/app/build/bin:$PATH ENTRYPOINT [ "bash", "-c" ] diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000000..7c5e2438812 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +build*/ +.github/ +.devops/ \ No newline at end of file diff --git a/.github/workflows/bindings-ruby.yml b/.github/workflows/bindings-ruby.yml index 63f7f61533c..680862fb764 100644 --- a/.github/workflows/bindings-ruby.yml +++ b/.github/workflows/bindings-ruby.yml @@ -1,55 +1,11 @@ name: Bindings Tests (Ruby) + on: push: - paths: - - bindings/ruby/** - - src/**/*.c - - src/**/*.cpp - - src/**/*.h - - src/**/*.m - - src/**/*.metal - - include/**/*.c - - include/**/*.cpp - - include/**/*.h - - include/**/*.m - - include/**/*.metal - - ggml/**/*.c - - ggml/**/*.cpp - - ggml/**/*.h - - ggml/**/*.m - - ggml/**/*.metal - - scripts/get-flags.mk - - examples/common.h - - examples/common.cpp - - examples/common-whisper.h - - examples/common-whisper.cpp - - examples/stb_vorbis.c - - examples/miniaudio.h + branches: + - master pull_request: - paths: - - bindings/ruby/** - - src/**/*.c - - src/**/*.cpp - - src/**/*.h - - src/**/*.m - - src/**/*.metal - - include/**/*.c - - include/**/*.cpp - - include/**/*.h - - include/**/*.m - - include/**/*.metal - - ggml/**/*.c - - ggml/**/*.cpp - - ggml/**/*.h - - ggml/**/*.m - - ggml/**/*.metal - - scripts/get-flags.mk - - examples/common.h - - examples/common.cpp - - examples/common-whisper.h - - examples/common-whisper.cpp - - examples/stb_vorbis.c - - examples/miniaudio.h + types: [opened, synchronize, reopened] jobs: ubuntu-22: @@ -60,6 +16,6 @@ jobs: steps: - uses: ruby/setup-ruby@v1 with: - ruby-version: '3.1' + ruby-version: '3.2' - uses: actions/checkout@v4 - run: rake test diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 790a5d200c0..3568db32b71 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -101,6 +101,10 @@ jobs: -v ${{ github.workspace }}:/workspace \ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' set -e + export DEBIAN_FRONTEND=noninteractive + sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + apt update apt install -y build-essential libsdl2-dev cmake git cmake -B build @@ -129,6 +133,14 @@ jobs: -v ${{ github.workspace }}:/workspace \ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' set -e + export DEBIAN_FRONTEND=noninteractive + sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + + apt-get update + apt-get install -y ca-certificates + sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list + apt update apt install -y build-essential libsdl2-dev cmake git cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a @@ -157,6 +169,14 @@ jobs: -v ${{ github.workspace }}:/workspace \ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' set -e + export DEBIAN_FRONTEND=noninteractive + sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + + apt-get update + apt-get install -y ca-certificates + sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list + apt update apt install -y build-essential libsdl2-dev cmake git cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp @@ -200,23 +220,23 @@ jobs: cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - freeBSD-latest: - runs-on: macos-13 - - steps: - - name: Clone - uses: actions/checkout@v4 - - - name: Build - uses: cross-platform-actions/action@v0.27.0 - with: - operating_system: freebsd - version: '14.2' - run: | - sudo pkg update - sudo pkg install -y gmake sdl2 cmake git - cmake -B build - cmake --build build --config Release +# freeBSD-latest: +# runs-on: macos-13 +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Build +# uses: cross-platform-actions/action@v0.27.0 +# with: +# operating_system: freebsd +# version: '14.2' +# run: | +# sudo pkg update +# sudo pkg install -y gmake sdl2 cmake git +# cmake -B build +# cmake --build build --config Release ubuntu-22-gcc: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -242,6 +262,10 @@ jobs: -v ${{ github.workspace }}:/workspace \ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' set -e + export DEBIAN_FRONTEND=noninteractive + sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + apt update apt install -y build-essential cmake libsdl2-dev git cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} @@ -272,6 +296,14 @@ jobs: -v ${{ github.workspace }}:/workspace \ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' set -e + export DEBIAN_FRONTEND=noninteractive + sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + + apt-get update + apt-get install -y ca-certificates + sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list + apt update apt install -y build-essential cmake libsdl2-dev git cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a @@ -302,6 +334,14 @@ jobs: -v ${{ github.workspace }}:/workspace \ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' set -e + export DEBIAN_FRONTEND=noninteractive + sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + + apt-get update + apt-get install -y ca-certificates + sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list + apt update apt install -y build-essential cmake libsdl2-dev git cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp @@ -335,6 +375,14 @@ jobs: -v ${{ github.workspace }}:/workspace \ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' set -e + export DEBIAN_FRONTEND=noninteractive + sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + + apt-get update + apt-get install -y ca-certificates + sed -i "s|http://ports.ubuntu.com|https://mirror.kumi.systems|g" /etc/apt/sources.list + apt update apt install -y clang build-essential cmake libsdl2-dev git cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang @@ -365,6 +413,10 @@ jobs: -v ${{ github.workspace }}:/workspace \ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' set -e + export DEBIAN_FRONTEND=noninteractive + sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list + apt update apt install -y build-essential cmake git cmake . -DCMAKE_BUILD_TYPE=Debug \ @@ -561,6 +613,7 @@ jobs: run: > cmake -S . -B ./build -A ${{ matrix.arch }} -DCMAKE_BUILD_TYPE=${{ matrix.build }} + -DBUILD_SHARED_LIBS=ON -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build @@ -572,18 +625,50 @@ jobs: if: matrix.sdl2 == 'ON' run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} - - name: Upload dll + - name: Upload SDL2.dll + if: matrix.sdl2 == 'ON' + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.s2arc }}_SDL2.dll + path: build/bin/${{ matrix.build }}/SDL2.dll + + - name: Upload whisper dll uses: actions/upload-artifact@v4 with: - name: ${{ matrix.jnaPath }}_whisper.dll + name: whisper_${{ matrix.arch }}.dll path: build/bin/${{ matrix.build }}/whisper.dll + - name: Upload ggml dll + uses: actions/upload-artifact@v4 + with: + name: ggml_${{ matrix.arch }}.dll + path: build/bin/${{ matrix.build }}/ggml.dll + + - name: Upload ggml base dll + uses: actions/upload-artifact@v4 + with: + name: ggml_base_${{ matrix.arch }}.dll + path: build/bin/${{ matrix.build }}/ggml-base.dll + + - name: Upload ggml cpu dll + uses: actions/upload-artifact@v4 + with: + name: ggml_cpu_${{ matrix.arch }}.dll + path: build/bin/${{ matrix.build }}/ggml-cpu.dll + + - name: Pack bin artifacts + shell: pwsh + run: | + Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip" + - name: Upload binaries - if: matrix.sdl2 == 'ON' + if: matrix.sdl2 == 'ON' && ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') || + github.event.inputs.create_release == 'true' || + github.event.inputs.pre_release_tag != '' }} uses: actions/upload-artifact@v4 with: - name: whisper-bin-${{ matrix.arch }} - path: build/bin/${{ matrix.build }} + name: whisper-bin-${{ matrix.arch }}.zip + path: whisper-bin-${{ matrix.arch }}.zip windows-blas: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -596,11 +681,14 @@ jobs: arch: [Win32, x64] blas: [ON] sdl2: [ON] + blasver: [0.3.29] include: - arch: Win32 s2arc: x86 + blasfile: x86 - arch: x64 s2arc: x64 + blasfile: x64_64 - sdl2: ON s2ver: 2.28.5 @@ -621,7 +709,8 @@ jobs: - name: Install OpenBLAS and pkgconfiglite if: matrix.blas == 'ON' run: | - vcpkg install --triplet=${{ matrix.s2arc }}-windows openblas + Invoke-WebRequest "https://github.com/OpenMathLib/OpenBLAS/releases/download/v${{matrix.blasver}}/OpenBLAS-${{matrix.blasver}}_${{matrix.blasfile}}.zip" -OutFile "OpenBLAS-${{matrix.blasver}}.zip" + Expand-Archive "OpenBLAS-${{matrix.blasver}}.zip" -DestinationPath "OpenBLAS-${{matrix.blasver}}" choco install pkgconfiglite - name: Fetch SDL2 and set SDL2_DIR @@ -638,6 +727,8 @@ jobs: -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_BLAS=${{ matrix.blas }} -DGGML_BLAS_VENDOR=OpenBLAS + -DBLAS_LIBRARIES="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/lib/libopenblas.lib" + -DBLAS_INCLUDE_DIRS="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/include" -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build @@ -647,30 +738,38 @@ jobs: - name: Copy openblas.dll if: matrix.blas == 'ON' - run: copy "C:/vcpkg/packages/openblas_${{ matrix.s2arc }}-windows/bin/openblas.dll" build/bin/${{ matrix.build }} + run: copy "$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/bin/libopenblas.dll" build/bin/${{ matrix.build }} - name: Copy SDL2.dll if: matrix.sdl2 == 'ON' run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} + - name: Pack bin artifacts + shell: pwsh + run: | + Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip" + - name: Upload binaries - if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' + if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' && ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') || + github.event.inputs.create_release == 'true' || + github.event.inputs.pre_release_tag != '' }} uses: actions/upload-artifact@v4 with: - name: whisper-blas-bin-${{ matrix.arch }} - path: build/bin/${{ matrix.build }} + name: whisper-blas-bin-${{ matrix.arch }}.zip + path: whisper-blas-bin-${{ matrix.arch }}.zip windows-cublas: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || github.event.inputs.run_type == 'full-ci' }} - runs-on: windows-2019 + runs-on: windows-2022 strategy: + fail-fast: false matrix: build: [Release] arch: [x64] cublas: [ON] sdl2: [ON] - cuda-toolkit: [12.2.0, 11.8.0] + cuda-toolkit: [12.4.0, 11.8.0] include: - arch: x64 sdl2: ON @@ -738,7 +837,7 @@ jobs: xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y # Visual Studio integration - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations" /E /I /H /Y + xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y # Set environment variables echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append @@ -746,23 +845,23 @@ jobs: echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 echo "CUDA_PATH_V11_8=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - name: Install Cuda Toolkit 12.2.0 - if: ${{ matrix.cuda-toolkit == '12.2.0' }} + - name: Install Cuda Toolkit 12.4.0 + if: ${{ matrix.cuda-toolkit == '12.4.0' }} run: | $CUDA_VERSION = ${{ matrix.cuda-toolkit }} $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION" $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist" # Components versions - $CUDART_VER = "12.2.140" - $NVCC_VER = "12.2.140" - $NVRTC_VER = "12.2.140" - $CUBLAS_VER = "12.2.5.6" - $NVTX_VER = "12.2.140" - $PROFILER_VER = "12.2.140" - $VS_VER = "12.2.140" - $NVPROF_VER = "12.2.142" - $CCCL_VER = "12.2.140" + $CUDART_VER = "12.4.127" + $NVCC_VER = "12.4.131" + $NVRTC_VER = "12.4.127" + $CUBLAS_VER = "12.4.5.8" + $NVTX_VER = "12.4.127" + $PROFILER_VER = "12.4.127" + $VS_VER = "12.4.127" + $NVPROF_VER = "12.4.128" + $CCCL_VER = "12.4.127" # Create the directory where the CUDA Toolkit will be installed mkdir -p $CUDA_TOOLKIT_DIR @@ -796,7 +895,7 @@ jobs: xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y # Visual Studio integration - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations" /E /I /H /Y + xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y # Set environment variables echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append @@ -824,14 +923,21 @@ jobs: - name: Build Project shell: cmd run: | - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" cmake --version where cmake + if "${{ matrix.cuda-toolkit }}" == "11.8.0" ( + set CUDA_FLAGS=-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR + ) else ( + set CUDA_FLAGS= + ) cmake -S . -B build -G "Ninja Multi-Config" ^ -DCMAKE_BUILD_TYPE=${{ matrix.build }} ^ -DGGML_CUDA=${{ matrix.cublas }} ^ -DWHISPER_SDL2=${{ matrix.sdl2 }} ^ - -DSDL2_DIR="%SDL2_DIR%" + -DSDL2_DIR="%SDL2_DIR%" ^ + -DCMAKE_POLICY_VERSION_MINIMUM=3.5 ^ + -DCMAKE_CUDA_FLAGS="%CUDA_FLAGS%" set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 cmake --build build --config ${{ matrix.build }} -j %NUMBER_OF_PROCESSORS% @@ -848,11 +954,19 @@ jobs: if: matrix.sdl2 == 'ON' run: copy "$env:SDL2_DIR/../lib/${{ matrix.arch }}/SDL2.dll" build/bin/${{ matrix.build }} + - name: Pack bin artifacts + shell: pwsh + run: | + Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip" + - name: Upload binaries + if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') || + github.event.inputs.create_release == 'true' || + github.event.inputs.pre_release_tag != '' }} uses: actions/upload-artifact@v4 with: - name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }} - path: build/bin/${{ matrix.build }} + name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip + path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip emscripten: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -938,7 +1052,7 @@ jobs: uses: actions/upload-artifact@v4 with: path: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip - name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework + name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip android: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -996,38 +1110,93 @@ jobs: chmod +x ./gradlew ./gradlew assembleRelease -# TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598 -# java: -# needs: [ 'windows' ] -# runs-on: windows-latest -# steps: -# - uses: actions/checkout@v4 -# -# - name: Install Java -# uses: actions/setup-java@v4 -# with: -# distribution: zulu -# java-version: 20 -# -# - name: Download Windows lib -# uses: actions/download-artifact@v4 -# with: -# name: win32-x86-64_whisper.dll -# path: bindings/java/build/generated/resources/main/win32-x86-64 -# -# - name: Build -# run: | -# models\download-ggml-model.cmd tiny.en -# cd bindings/java -# chmod +x ./gradlew -# ./gradlew build -# -# - name: Upload jar -# uses: actions/upload-artifact@v4 -# with: -# name: whispercpp.jar -# path: bindings/java/build/libs/whispercpp-*.jar -# + bindings-java: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + needs: ['windows'] + runs-on: windows-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Java + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: 20 + + - name: Download Whisper Windows lib + uses: actions/download-artifact@v4 + with: + name: whisper_x64.dll + + - name: Download GGML Windows lib + uses: actions/download-artifact@v4 + with: + name: ggml_x64.dll + + - name: Download GGML Base Windows lib + uses: actions/download-artifact@v4 + with: + name: ggml_base_x64.dll + + - name: Download GGML CPU Windows lib + uses: actions/download-artifact@v4 + with: + name: ggml_cpu_x64.dll + + - name: Download SDL2.dll + uses: actions/download-artifact@v4 + with: + name: x64_SDL2.dll + + - name: List downloaded files + shell: pwsh + run: | + Get-ChildItem -Path "." -Recurse -Filter "*.dll" + + - name: Move DLL to correct location + shell: pwsh + run: | + New-Item -Path "build\bin\Release" -ItemType Directory -Force + + Copy-Item -Path "whisper.dll" -Destination "build\bin\Release\whisper.dll" -Force + Write-Host "Copied whisper.dll to build\bin\Release\whisper.dll directory" + + Copy-Item -Path "ggml.dll" -Destination "build\bin\Release\ggml.dll" -Force + Write-Host "Copied ggml.dll to build\bin\Release\ggml.dll directory" + + Copy-Item -Path "ggml-base.dll" -Destination "build\bin\Release\ggml-base.dll" -Force + Write-Host "Copied ggml-base.dll to build\bin\Release\ggml-base.dll directory" + + Copy-Item -Path "ggml-cpu.dll" -Destination "build\bin\Release\ggml-cpu.dll" -Force + Write-Host "Copied ggml-cpu.dll to build\bin\Release\ggml-cpu.dll directory" + + Copy-Item -Path "SDL2.dll" -Destination "build\bin\Release\SDL2.dll" -Force + Write-Host "Copied SDL2.dll to build\bin\Release\SDL2.dll directory" + + - name: List build release files + shell: pwsh + run: | + Get-ChildItem -Path "build\Release" -Recurse -Filter "*.dll" + + - name: Build + run: | + models\download-ggml-model.cmd tiny.en models/ + cd bindings/java + chmod +x ./gradlew + ./gradlew build --info + + - name: Pack jar artifacts + shell: pwsh + run: | + Compress-Archive -Path "bindings/java/build/libs/whispercpp-*.jar" -DestinationPath "whispercpp.jar.zip" + + - name: Upload jar + uses: actions/upload-artifact@v4 + with: + name: whispercpp.jar.zip + path: whispercpp.jar.zip + # - name: Publish package # if: ${{ github.ref == 'refs/heads/master' }} # uses: gradle/gradle-build-action@v2.4.2 @@ -1064,6 +1233,9 @@ jobs: needs: - determine-tag - ios-xcode-build + - windows + - windows-blas + - windows-cublas steps: - name: Clone @@ -1147,3 +1319,23 @@ jobs: source venv/bin/activate pip install ane_transformers openai-whisper coremltools ./models/generate-coreml-model.sh ${{ env.MODEL_NAME }} + + vad: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Build + shell: bash + run: | + cmake -B build + cmake --build build --config Release + + - name: Test + shell: bash + run: | + ctest -R ^test-vad$ --test-dir build --output-on-failure -VV diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 55f75f0c83e..c5e9e90d71b 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -18,9 +18,9 @@ jobs: matrix: config: - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" } - #TODO: the cuda image keeps failing - disable for now - # https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339 - #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" } + - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" } + - { tag: "main-intel", dockerfile: ".devops/main-intel.Dockerfile", platform: "linux/amd64" } + - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" } steps: - name: Check out the repo diff --git a/.gitignore b/.gitignore index 91368ec577b..0957376dd8b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ build/ build-*/ +build_*/ # SPM .build/ @@ -49,6 +50,8 @@ extra/bench-gg.txt models/*.mlmodel models/*.mlmodelc models/*.mlpackage +models/*-encoder-openvino.xml +models/*-encoder-openvino-cache/ bindings/java/.gradle/ bindings/java/.idea/ .idea/ diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/CMakeLists.txt b/CMakeLists.txt index be6db903c4a..36eef350c09 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories. project("whisper.cpp" C CXX) -project("whisper.cpp" VERSION 1.7.5) +project("whisper.cpp" VERSION 1.7.6) include(CheckIncludeFileCXX) set(SOVERSION 1) @@ -59,9 +59,6 @@ option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) # option list # -# general -option(WHISPER_CCACHE "whisper: use ccache if available" ON) - # debug option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON) option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF) @@ -96,7 +93,6 @@ option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) # override ggml options -set(GGML_CCACHE ${WHISPER_CCACHE}) set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD}) set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS}) set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED}) @@ -121,6 +117,12 @@ whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP) whisper_option_depr(WARNING WHISPER_RPC GGML_RPC) whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL) whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16) +whisper_option_depr(WARNING WHISPER_CCACHE GGML_CCACHE) + +if (GGML_CUDA AND NOT MSVC) + #GGML_CUDA enabled, add the necessary compile options -Wno-deprecated-gpu-targets + add_compile_options(-Wno-deprecated-gpu-targets) +endif() # # build the library @@ -135,6 +137,22 @@ if (NOT TARGET ggml) add_library(ggml ALIAS ggml::ggml) else() add_subdirectory(ggml) + if(WIN32) + # The following adds a _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR macro and is a workaround for + # the Windows C++ standard library which does not support constexpr mutexes. + # From the release notes://github.com/microsoft/STL/wiki/Changelog + # Disable constexpr mutex constructor on Windows + # Fixed mutex's constructor to be constexpr. #3824 #4000 #4339 + # Note: Programs that aren't following the documented restrictions on binary compatibility may encounter + # null dereferences in mutex machinery. You must follow this rule: + # When you mix binaries built by different supported versions of the toolset, the Redistributable version + # must be at least as new as the latest toolset used by any app component. + # You can define _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR as an escape hatch. + # + # Specifically to whisper.cpp this would cause a crash when using the Java bindings. + # resulting in a Invalid memory access error. + target_compile_definitions(ggml-base PRIVATE _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR) + endif() endif() # ... otherwise assume ggml is added by a parent CMakeLists.txt endif() @@ -197,3 +215,37 @@ endif () if (WHISPER_BUILD_EXAMPLES) add_subdirectory(examples) endif() + +if (MSVC) + set(MSVC_WARNING_FLAGS + /wd4101 # Unreferenced local variable + /wd4005 # Macro redefinition + /wd4065 # switch statement contains 'default' but no 'case' labels + /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data + /wd4244 # Conversion from one type to another type, possible loss of ata + /wd4805 # Unsafe mix of type + /wd4305 # Truncation from 'type1' to 'type2' (often double to float) + /wd4996 # Function or variable may be unsafe/deprecated + ) + function(disable_msvc_warnings target_name) + if(TARGET ${target_name}) + target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS}) + endif() + endfunction() + + if (WHISPER_BUILD_EXAMPLES) + disable_msvc_warnings(whisper) + disable_msvc_warnings(common) + disable_msvc_warnings(common-sdl) + disable_msvc_warnings(lsp) + disable_msvc_warnings(wchess-core) + disable_msvc_warnings(whisper-command) + disable_msvc_warnings(whisper-cli) + disable_msvc_warnings(whisper-server) + disable_msvc_warnings(whisper-stream) + disable_msvc_warnings(whisper-talk-llama) + disable_msvc_warnings(whisper-bench) + disable_msvc_warnings(quantize) + disable_msvc_warnings(vad-speech-segments) + endif() +endif() diff --git a/Makefile b/Makefile index dbda58acc44..97a26d48f92 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ .PHONY: build build: - cmake -B build + cmake -B build $(CMAKE_ARGS) cmake --build build --config Release # download a few audio samples into folder "./samples": @@ -41,17 +41,17 @@ samples: tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo: bash ./models/download-ggml-model.sh $@ - cmake -B build + cmake -B build $(CMAKE_ARGS) cmake --build build --config Release @echo "" @echo "===============================================" @echo "Running $@ on all samples in ./samples ..." @echo "===============================================" @echo "" - @for f in samples/*$(.flac .mp3 .ogg .wav); do \ + @for f in samples/*.{flac,mp3,ogg,wav}; do \ echo "----------------------------------------------" ; \ echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \ - echo "----------------------------------------------" ; \ + echo "----------------------------------------------" ; \ echo "" ; \ ./build/bin/whisper-cli -m models/ggml-$@.bin -f $$f ; \ echo "" ; \ diff --git a/README.md b/README.md index b4b3ec79e8f..2e92a27f07c 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,12 @@ ![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg) -[![Actions Status](https://github.com/ggerganov/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/whisper.cpp/actions) +[![Actions Status](https://github.com/ggml-org/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggml-org/whisper.cpp/actions) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp) [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/) -> [!NOTE] -> New maintenance roadmap: https://github.com/ggerganov/whisper.cpp/discussions/2788 - -Stable: [v1.7.5](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.5) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126) +Stable: [v1.7.6](https://github.com/ggml-org/whisper.cpp/releases/tag/v1.7.6) / [Roadmap](https://github.com/orgs/ggml-org/projects/4/) High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model: @@ -26,7 +23,9 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp - [Efficient GPU support for NVIDIA](#nvidia-gpu-support) - [OpenVINO Support](#openvino-support) - [Ascend NPU Support](#ascend-npu-support) -- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h) +- [Moore Threads GPU Support](#moore-threads-gpu-support) +- [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h) +- [Voice Activity Detection (VAD)](#voice-activity-detection-vad) Supported platforms: @@ -34,14 +33,14 @@ Supported platforms: - [x] [iOS](examples/whisper.objc) - [x] [Android](examples/whisper.android) - [x] [Java](bindings/java/README.md) -- [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264) +- [x] Linux / [FreeBSD](https://github.com/ggml-org/whisper.cpp/issues/56#issuecomment-1350920264) - [x] [WebAssembly](examples/whisper.wasm) -- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)] -- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166) -- [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp) +- [x] Windows ([MSVC](https://github.com/ggml-org/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggml-org/whisper.cpp/issues/168)) +- [x] [Raspberry Pi](https://github.com/ggml-org/whisper.cpp/discussions/166) +- [x] [Docker](https://github.com/ggml-org/whisper.cpp/pkgs/container/whisper.cpp) The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp). -The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library. +The rest of the code is part of the [`ggml`](https://github.com/ggml-org/ggml) machine learning library. Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications. As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc) @@ -54,14 +53,14 @@ https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a On Apple Silicon, the inference runs fully on the GPU via Metal: -https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225 +https://github.com/ggml-org/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225 ## Quick start First clone the repository: ```bash -git clone https://github.com/ggerganov/whisper.cpp.git +git clone https://github.com/ggml-org/whisper.cpp.git ``` Navigate into the directory: @@ -152,6 +151,7 @@ standard cmake setup with: cmake -B build -DGGML_BLAS=1 cmake --build build --config Release ./build/bin/whisper-cli [ .. etc .. ] +``` ## Quantization @@ -225,7 +225,7 @@ speed-up - more than x3 faster compared with CPU-only execution. Here are the in The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format. Next runs are faster. -For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566). +For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggml-org/whisper.cpp/pull/566). ## OpenVINO support @@ -267,7 +267,7 @@ This can result in significant speedup in encoder performance. Here are the inst - Build `whisper.cpp` with OpenVINO support: - Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0). + Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2024.6.0](https://github.com/openvinotoolkit/openvino/releases/tag/2024.6.0). Ready to use Binaries of the required libraries can be found in the [OpenVino Archives](https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/) After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example: @@ -310,7 +310,7 @@ This can result in significant speedup in encoder performance. Here are the inst The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get cached for the next run. -For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037). +For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggml-org/whisper.cpp/pull/1037). ## NVIDIA GPU support @@ -324,6 +324,12 @@ cmake -B build -DGGML_CUDA=1 cmake --build build -j --config Release ``` +or for newer NVIDIA GPU's (RTX 5000 series): +``` +cmake -B build -DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="86" +cmake --build build -j --config Release +``` + ## Vulkan GPU support Cross-vendor solution which allows you to accelerate workload on your GPU. First, make sure your graphics card driver provides support for Vulkan API. @@ -377,6 +383,56 @@ Run the inference examples as usual, for example: - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag. - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`. +## Moore Threads GPU support + +With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels. +First, make sure you have installed `MUSA SDK rc4.0.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.0.1 + +Now build `whisper.cpp` with MUSA support: + +``` +cmake -B build -DGGML_MUSA=1 +cmake --build build -j --config Release +``` + +or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows: + +``` +cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21" +cmake --build build -j --config Release +``` + +## FFmpeg support (Linux only) + +If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration. + +First, you need to install required libraries: + +```bash +# Debian/Ubuntu +sudo apt install libavcodec-dev libavformat-dev libavutil-dev + +# RHEL/Fedora +sudo dnf install libavcodec-free-devel libavformat-free-devel libavutil-free-devel +``` + +Then you can build the project as follows: + +```bash +cmake -B build -D WHISPER_FFMPEG=yes +cmake --build build +``` + +Run the following example to confirm it's working: + +```bash +# Convert an audio file to Opus format +ffmpeg -i samples/jfk.wav jfk.opus + +# Transcribe the audio file +./build/bin/whisper-cli --model models/ggml-base.en.bin --file jfk.opus +``` + ## Docker ### Prerequisites @@ -388,8 +444,9 @@ Run the inference examples as usual, for example: We have two Docker images available for this project: -1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`) -2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`) +1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`) +2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`) +3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`) ### Usage @@ -402,11 +459,11 @@ docker run -it --rm \ docker run -it --rm \ -v path/to/models:/models \ -v path/to/audios:/audios \ - whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav" + whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav" # transcribe an audio file in samples folder docker run -it --rm \ -v path/to/models:/models \ - whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav" + whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav" ``` ## Installing with Conan @@ -427,8 +484,8 @@ For detailed instructions on how to use Conan, please refer to the [Conan docume This is a naive example of performing real-time inference on audio from your microphone. The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously. -More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10). -You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly. +More info is available in [issue #10](https://github.com/ggml-org/whisper.cpp/issues/10). +You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly. ```bash cmake -B build -DWHISPER_SDL2=ON @@ -516,7 +573,7 @@ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 pr ## Speaker segmentation via tinydiarize (experimental) -More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058 +More information about this approach is available here: https://github.com/ggml-org/whisper.cpp/pull/1058 Sample usage: @@ -543,7 +600,7 @@ main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 pr ## Karaoke-style movie generation (experimental) The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the -currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script. +currently pronounced word is highlighted. Use the `-owts` argument and run the generated bash script. This requires to have `ffmpeg` installed. Here are a few _"typical"_ examples: @@ -580,7 +637,7 @@ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a ## Video comparison of different models -Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format: +Use the [scripts/bench-wts.sh](https://github.com/ggml-org/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format: ```bash ./scripts/bench-wts.sh samples/jfk.wav @@ -597,7 +654,7 @@ In order to have an objective comparison of the performance of the inference acr use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it took to execute it. The results are summarized in the following Github issue: -[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89) +[Benchmark results](https://github.com/ggml-org/whisper.cpp/issues/89) Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py). @@ -624,25 +681,24 @@ You can download the converted models using the [models/download-ggml-model.sh]( or manually from here: - https://huggingface.co/ggerganov/whisper.cpp -- https://ggml.ggerganov.com For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md). -## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings) +## [Bindings](https://github.com/ggml-org/whisper.cpp/discussions/categories/bindings) -- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310) -- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309) +- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggml-org/whisper.cpp/discussions/310) +- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggml-org/whisper.cpp/discussions/309) - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn) -- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312) +- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggml-org/whisper.cpp/discussions/312) - [x] Java: - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni) -- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507) -- [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313) +- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggml-org/whisper.cpp/discussions/507) +- [x] Objective-C / Swift: [ggml-org/whisper.spm](https://github.com/ggml-org/whisper.spm) | [#313](https://github.com/ggml-org/whisper.cpp/discussions/313) - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper) -- [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422) +- [x] .NET: | [#422](https://github.com/ggml-org/whisper.cpp/discussions/422) - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net) - [NickDarvey/whisper](https://github.com/NickDarvey/whisper) -- [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9) +- [x] Python: | [#9](https://github.com/ggml-org/whisper.cpp/issues/9) - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython) - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp) - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11) @@ -650,6 +706,118 @@ For more details, see the conversion script [models/convert-pt-to-ggml.py](model - [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper) - [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity) +## XCFramework +The XCFramework is a precompiled version of the library for iOS, visionOS, tvOS, +and macOS. It can be used in Swift projects without the need to compile the +library from source. For example, the v1.7.5 version of the XCFramework can be +used as follows: + +```swift +// swift-tools-version: 5.10 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "Whisper", + targets: [ + .executableTarget( + name: "Whisper", + dependencies: [ + "WhisperFramework" + ]), + .binaryTarget( + name: "WhisperFramework", + url: "https://github.com/ggml-org/whisper.cpp/releases/download/v1.7.5/whisper-v1.7.5-xcframework.zip", + checksum: "c7faeb328620d6012e130f3d705c51a6ea6c995605f2df50f6e1ad68c59c6c4a" + ) + ] +) +``` + +## Voice Activity Detection (VAD) +Support for Voice Activity Detection (VAD) can be enabled using the `--vad` +argument to `whisper-cli`. In addition to this option a VAD model is also +required. + +The way this works is that first the audio samples are passed through +the VAD model which will detect speech segments. Using this information the +only the speech segments that are detected are extracted from the original audio +input and passed to whisper for processing. This reduces the amount of audio +data that needs to be processed by whisper and can significantly speed up the +transcription process. + +The following VAD models are currently supported: + +### Silero-VAD +[Silero-vad](https://github.com/snakers4/silero-vad) is a lightweight VAD model +written in Python that is fast and accurate. + +Models can be downloaded by running the following command on Linux or MacOS: +```console +$ ./models/download-vad-model.sh silero-v5.1.2 +Downloading ggml model silero-v5.1.2 from 'https://huggingface.co/ggml-org/whisper-vad' ... +ggml-silero-v5.1.2.bin 100%[==============================================>] 864.35K --.-KB/s in 0.04s +Done! Model 'silero-v5.1.2' saved in '/path/models/ggml-silero-v5.1.2.bin' +You can now use it like this: + + $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v5.1.2.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin + +``` +And the following command on Windows: +```console +> .\models\download-vad-model.cmd silero-v5.1.2 +Downloading vad model silero-v5.1.2... +Done! Model silero-v5.1.2 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v5.1.2.bin +You can now use it like this: + +C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v5.1.2.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav + +``` + +To see a list of all available models, run the above commands without any +arguments. + +This model can be also be converted manually to ggml using the following command: +```console +$ python3 -m venv venv && source venv/bin/activate +$ (venv) pip install silero-vad +$ (venv) $ python models/convert-silero-vad-to-ggml.py --output models/silero.bin +Saving GGML Silero-VAD model to models/silero-v5.1.2-ggml.bin +``` +And it can then be used with whisper as follows: +```console +$ ./build/bin/whisper-cli \ + --file ./samples/jfk.wav \ + --model ./models/ggml-base.en.bin \ + --vad \ + --vad-model ./models/silero-v5.1.2-ggml.bin +``` + +### VAD Options + +* --vad-threshold: Threshold probability for speech detection. A probability +for a speech segment/frame above this threshold will be considered as speech. + +* --vad-min-speech-duration-ms: Minimum speech duration in milliseconds. Speech +segments shorter than this value will be discarded to filter out brief noise or +false positives. + +* --vad-min-silence-duration-ms: Minimum silence duration in milliseconds. Silence +periods must be at least this long to end a speech segment. Shorter silence +periods will be ignored and included as part of the speech. + +* --vad-max-speech-duration-s: Maximum speech duration in seconds. Speech segments +longer than this will be automatically split into multiple segments at silence +points exceeding 98ms to prevent excessively long segments. + +* --vad-speech-pad-ms: Speech padding in milliseconds. Adds this amount of padding +before and after each detected speech segment to avoid cutting off speech edges. + +* --vad-samples-overlap: Amount of audio to extend from each speech segment into +the next one, in seconds (e.g., 0.10 = 100ms overlap). This ensures speech isn't +cut off abruptly between segments when they're concatenated together. + ## Examples There are various examples of using the library for different projects in the [examples](examples) folder. @@ -668,13 +836,13 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch | [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp | | [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim | | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture | -| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) | +| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggml-org/whisper.cpp/issues/185) | | [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) | | [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess | -## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions) +## [Discussions](https://github.com/ggml-org/whisper.cpp/discussions) If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic. -You can use the [Show and tell](https://github.com/ggerganov/whisper.cpp/discussions/categories/show-and-tell) category +You can use the [Show and tell](https://github.com/ggml-org/whisper.cpp/discussions/categories/show-and-tell) category to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the -[Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) discussion. +[Frequently asked questions (#126)](https://github.com/ggml-org/whisper.cpp/discussions/126) discussion. diff --git a/README_sycl.md b/README_sycl.md index 9ea2a7908ab..2d31d284e5a 100644 --- a/README_sycl.md +++ b/README_sycl.md @@ -1,249 +1,249 @@ -# whisper.cpp for SYCL - -[Background](#background) - -[OS](#os) - -[Intel GPU](#intel-gpu) - -[Linux](#linux) - -[Environment Variable](#environment-variable) - -[Known Issue](#known-issue) - -[Todo](#todo) - -## Background - -SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators—such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17. - -oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms. - -Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs. - -To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL. - -The whisper.cpp for SYCL is used to support Intel GPUs. - -For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build). - -## OS - -|OS|Status|Verified| -|-|-|-| -|Linux|Support|Ubuntu 22.04| -|Windows|Ongoing| | - - -## Intel GPU - -|Intel GPU| Status | Verified Model| -|-|-|-| -|Intel Data Center Max Series| Support| Max 1550| -|Intel Data Center Flex Series| Support| Flex 170| -|Intel Arc Series| Support| Arc 770| -|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake| -|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7| - - -## Linux - -### Setup Environment - -1. Install Intel GPU driver. - -a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html). - -Note: for iGPU, please install the client GPU driver. - -b. Add user to group: video, render. - -``` -sudo usermod -aG render username -sudo usermod -aG video username -``` - -Note: re-login to enable it. - -c. Check - -``` -sudo apt install clinfo -sudo clinfo -l -``` - -Output (example): - -``` -Platform #0: Intel(R) OpenCL Graphics - `-- Device #0: Intel(R) Arc(TM) A770 Graphics - - -Platform #0: Intel(R) OpenCL HD Graphics - `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49] -``` - -2. Install Intel® oneAPI Base toolkit. - - -a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html). - -Recommend to install to default folder: **/opt/intel/oneapi**. - -Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder. - -b. Check - -``` -source /opt/intel/oneapi/setvars.sh - -sycl-ls -``` - -There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**. - -Output (example): -``` -[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000] -[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000] -[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50] -[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918] - -``` - -2. Build locally: - -``` -mkdir -p build -cd build -source /opt/intel/oneapi/setvars.sh - -#for FP16 -#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON - -#for FP32 -cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx - -#build example/main only -#cmake --build . --config Release --target main - -#build all binary -cmake --build . --config Release -v - -``` - -or - -``` -./examples/sycl/build.sh -``` - -Note: - -- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only. - -### Run - -1. Put model file to folder **models** - -2. Enable oneAPI running environment - -``` -source /opt/intel/oneapi/setvars.sh -``` - -3. List device ID - -Run without parameter: - -``` -./build/bin/ls-sycl-device - -or - -./build/bin/main -``` - -Check the ID in startup log, like: - -``` -found 4 SYCL devices: - Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3, - max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136 - Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2, - max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280 - Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0, - max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280 - Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0, - max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136 - -``` - -|Attribute|Note| -|-|-| -|compute capability 1.3|Level-zero running time, recommended | -|compute capability 3.0|OpenCL running time, slower than level-zero in most cases| - -4. Set device ID and execute whisper.cpp - -Set device ID = 0 by **GGML_SYCL_DEVICE=0** - -``` -GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav -``` -or run by script: - -``` -./examples/sycl/run_whisper.sh -``` - - - -5. Check the device ID in output - -Like: -``` -Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device -``` - - -## Environment Variable - -#### Build - -|Name|Value|Function| -|-|-|-| -|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path.
For FP32/FP16, WHISPER_SYCL=ON is mandatory.| -|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.| -|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path| -|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path| - -#### Running - - -|Name|Value|Function| -|-|-|-| -|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output| -|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG| - -## Known Issue - -- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`. - - Miss to enable oneAPI running environment. - - Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`. - - -- Hang during startup - - llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block. - - Solution: add **--no-mmap**. - -## Todo - -- Support to build in Windows. - -- Support multiple cards. \ No newline at end of file +# whisper.cpp for SYCL + +[Background](#background) + +[OS](#os) + +[Intel GPU](#intel-gpu) + +[Linux](#linux) + +[Environment Variable](#environment-variable) + +[Known Issue](#known-issue) + +[Todo](#todo) + +## Background + +SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators—such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17. + +oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms. + +Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs. + +To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL. + +The whisper.cpp for SYCL is used to support Intel GPUs. + +For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build). + +## OS + +|OS|Status|Verified| +|-|-|-| +|Linux|Support|Ubuntu 22.04| +|Windows|Ongoing| | + + +## Intel GPU + +|Intel GPU| Status | Verified Model| +|-|-|-| +|Intel Data Center Max Series| Support| Max 1550| +|Intel Data Center Flex Series| Support| Flex 170| +|Intel Arc Series| Support| Arc 770| +|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake| +|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7| + + +## Linux + +### Setup Environment + +1. Install Intel GPU driver. + +a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html). + +Note: for iGPU, please install the client GPU driver. + +b. Add user to group: video, render. + +``` +sudo usermod -aG render username +sudo usermod -aG video username +``` + +Note: re-login to enable it. + +c. Check + +``` +sudo apt install clinfo +sudo clinfo -l +``` + +Output (example): + +``` +Platform #0: Intel(R) OpenCL Graphics + `-- Device #0: Intel(R) Arc(TM) A770 Graphics + + +Platform #0: Intel(R) OpenCL HD Graphics + `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49] +``` + +2. Install Intel® oneAPI Base toolkit. + + +a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html). + +Recommend to install to default folder: **/opt/intel/oneapi**. + +Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder. + +b. Check + +``` +source /opt/intel/oneapi/setvars.sh + +sycl-ls +``` + +There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**. + +Output (example): +``` +[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000] +[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000] +[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50] +[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918] + +``` + +2. Build locally: + +``` +mkdir -p build +cd build +source /opt/intel/oneapi/setvars.sh + +#for FP16 +#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON + +#for FP32 +cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx + +#build example/main only +#cmake --build . --config Release --target main + +#build all binary +cmake --build . --config Release -v + +``` + +or + +``` +./examples/sycl/build.sh +``` + +Note: + +- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only. + +### Run + +1. Put model file to folder **models** + +2. Enable oneAPI running environment + +``` +source /opt/intel/oneapi/setvars.sh +``` + +3. List device ID + +Run without parameter: + +``` +./build/bin/ls-sycl-device + +or + +./build/bin/main +``` + +Check the ID in startup log, like: + +``` +found 4 SYCL devices: + Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3, + max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136 + Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2, + max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280 + Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0, + max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280 + Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0, + max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136 + +``` + +|Attribute|Note| +|-|-| +|compute capability 1.3|Level-zero running time, recommended | +|compute capability 3.0|OpenCL running time, slower than level-zero in most cases| + +4. Set device ID and execute whisper.cpp + +Set device ID = 0 by **GGML_SYCL_DEVICE=0** + +``` +GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav +``` +or run by script: + +``` +./examples/sycl/run_whisper.sh +``` + + + +5. Check the device ID in output + +Like: +``` +Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device +``` + + +## Environment Variable + +#### Build + +|Name|Value|Function| +|-|-|-| +|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path.
For FP32/FP16, WHISPER_SYCL=ON is mandatory.| +|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.| +|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path| +|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path| + +#### Running + + +|Name|Value|Function| +|-|-|-| +|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output| +|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG| + +## Known Issue + +- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`. + + Miss to enable oneAPI running environment. + + Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`. + + +- Hang during startup + + llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block. + + Solution: add **--no-mmap**. + +## Todo + +- Support to build in Windows. + +- Support multiple cards. diff --git a/bindings/go/README.md b/bindings/go/README.md index cbd2a622874..9d832096512 100644 --- a/bindings/go/README.md +++ b/bindings/go/README.md @@ -51,7 +51,7 @@ func main() { In order to build, you need to have the Go compiler installed. You can get it from [here](https://golang.org/dl/). Run the tests with: ```bash -git clone https://github.com/ggerganov/whisper.cpp.git +git clone https://github.com/ggml-org/whisper.cpp.git cd whisper.cpp/bindings/go make test ``` @@ -98,7 +98,7 @@ The API Documentation: Getting help: - * Follow the discussion for the go bindings [here](https://github.com/ggerganov/whisper.cpp/discussions/312) + * Follow the discussion for the go bindings [here](https://github.com/ggml-org/whisper.cpp/discussions/312) ## License diff --git a/bindings/go/doc.go b/bindings/go/doc.go index dcc351f2732..a5dae9314b0 100644 --- a/bindings/go/doc.go +++ b/bindings/go/doc.go @@ -1,5 +1,5 @@ /* -github.com/ggerganov/whisper.cpp/bindings/go +github.com/ggml-org/whisper.cpp/bindings/go provides a speech-to-text service bindings for the Go programming language. */ package whisper diff --git a/bindings/java/README.md b/bindings/java/README.md index 568965493ed..90426997237 100644 --- a/bindings/java/README.md +++ b/bindings/java/README.md @@ -23,26 +23,42 @@ import io.github.ggerganov.whispercpp.WhisperCpp; public class Example { public static void main(String[] args) { + WhisperCpp whisper = new WhisperCpp(); - // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin" - // or you can provide the absolute path to the model file. - long context = whisper.initContext("base.en"); try { - var whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY); - // custom configuration if required - whisperParams.temperature_inc = 0f; + // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin" + // or you can provide the absolute path to the model file. + whisper.initContext("../ggml-base.en.bin"); + WhisperFullParams.ByValue whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH); - var samples = readAudio(); // divide each value by 32767.0f - whisper.fullTranscribe(whisperParams, samples); + // custom configuration if required + //whisperParams.n_threads = 8; + whisperParams.temperature = 0.0f; + whisperParams.temperature_inc = 0.2f; + //whisperParams.language = "en"; + + float[] samples = readAudio(); // divide each value by 32767.0f + List whisperSegmentList = whisper.fullTranscribeWithTime(whisperParams, samples); - int segmentCount = whisper.getTextSegmentCount(context); - for (int i = 0; i < segmentCount; i++) { - String text = whisper.getTextSegment(context, i); - System.out.println(segment.getText()); + for (WhisperSegment whisperSegment : whisperSegmentList) { + + long start = whisperSegment.getStart(); + long end = whisperSegment.getEnd(); + + String text = whisperSegment.getSentence(); + + System.out.println("start: "+start); + System.out.println("end: "+end); + System.out.println("text: "+text); + } + + } catch (IOException e) { + e.printStackTrace(); } finally { - whisper.freeContext(context); + whisper.close(); } + } } ``` @@ -52,7 +68,7 @@ public class Example { In order to build, you need to have the JDK 8 or higher installed. Run the tests with: ```bash -git clone https://github.com/ggerganov/whisper.cpp.git +git clone https://github.com/ggml-org/whisper.cpp.git cd whisper.cpp/bindings/java ./gradlew build diff --git a/bindings/java/build.gradle b/bindings/java/build.gradle index eb1a5c0759e..30184eed7ea 100644 --- a/bindings/java/build.gradle +++ b/bindings/java/build.gradle @@ -27,23 +27,41 @@ sourceSets { tasks.register('copyLibwhisperDynlib', Copy) { from '../../build/src' include 'libwhisper.dylib' - into 'build/generated/resources/main/darwin' + into 'build/generated/resources/main' } tasks.register('copyLibwhisperSo', Copy) { from '../../build/src' include 'libwhisper.so' - into 'build/generated/resources/main/linux-x86-64' + into 'build/generated/resources/main' } -tasks.register('copyWhisperDll', Copy) { - from '../../build/Release' +tasks.register('copyWhisperDLL', Copy) { + from '../../build/bin/Release' include 'whisper.dll' - into 'build/generated/resources/main/windows-x86-64' + into 'build/generated/resources/main' +} + +tasks.register('copyGGML_BASE_DLL', Copy) { + from '../../build/bin/Release' + include 'ggml-base.dll' + into 'build/generated/resources/main' +} + +tasks.register('copyGGML_DLL', Copy) { + from '../../build/bin/Release' + include 'ggml.dll' + into 'build/generated/resources/main' +} + +tasks.register('copyGGML_CPU_DLL', Copy) { + from '../../build/bin/Release' + include 'ggml-cpu.dll' + into 'build/generated/resources/main' } tasks.register('copyLibs') { - dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDll + dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDLL, copyGGML_BASE_DLL, copyGGML_DLL, copyGGML_CPU_DLL } test { diff --git a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java index 621d8c636ca..cc5314829c6 100644 --- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java +++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java @@ -168,23 +168,26 @@ public String fullTranscribe(WhisperFullParams.ByValue whisperParams, float[] au return str.toString().trim(); } - public List fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException { + /** + * Full transcribe with time list. + * + * @param whisperParams the whisper params + * @param audioData the audio data + * @return the list + * @throws IOException the io exception + */ + public List fullTranscribeWithTime(WhisperFullParams.ByValue whisperParams, float[] audioData) throws IOException { if (ctx == null) { throw new IllegalStateException("Model not initialised"); } - WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue( - lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal())); - valueParams.read(); - - if (lib.whisper_full(ctx, valueParams, audioData, audioData.length) != 0) { + if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) { throw new IOException("Failed to process audio"); } int nSegments = lib.whisper_full_n_segments(ctx); List segments= new ArrayList<>(nSegments); - for (int i = 0; i < nSegments; i++) { long t0 = lib.whisper_full_get_segment_t0(ctx, i); String text = lib.whisper_full_get_segment_text(ctx, i); diff --git a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java index 1cd2449f534..690f1bd5258 100644 --- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java +++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java @@ -9,6 +9,7 @@ import io.github.ggerganov.whispercpp.params.WhisperFullParams; public interface WhisperCppJnaLibrary extends Library { + WhisperCppJnaLibrary instance = Native.load("whisper", WhisperCppJnaLibrary.class); String whisper_print_system_info(); diff --git a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java index 9d63fff34e6..bf37e519992 100644 --- a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java +++ b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java @@ -118,7 +118,7 @@ void testFullTranscribeWithTime() throws Exception { float[] floats = new float[b.length / 2]; //WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY); - WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH); + WhisperFullParams.ByValue params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH); params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress)); params.print_progress = CBool.FALSE; //params.initial_prompt = "and so my fellow Americans um, like"; diff --git a/bindings/javascript/package.json b/bindings/javascript/package.json index df72d132943..3d0e0710519 100644 --- a/bindings/javascript/package.json +++ b/bindings/javascript/package.json @@ -1,6 +1,6 @@ { "name": "whisper.cpp", - "version": "1.7.5", + "version": "1.7.6", "description": "Whisper speech recognition", "main": "whisper.js", "scripts": { diff --git a/bindings/ruby/.gitignore b/bindings/ruby/.gitignore index e04a90a9c69..54e3a2ac184 100644 --- a/bindings/ruby/.gitignore +++ b/bindings/ruby/.gitignore @@ -1,3 +1,9 @@ LICENSE pkg/ lib/whisper.* +ext/examples/ +ext/ggml/ +ext/include/ +ext/scripts/ +ext/src/ +test/fixtures/ diff --git a/bindings/ruby/README.md b/bindings/ruby/README.md index f66d8d651e2..fff6efc7c5c 100644 --- a/bindings/ruby/README.md +++ b/bindings/ruby/README.md @@ -16,6 +16,32 @@ If bundler is not being used to manage dependencies, install the gem by executin $ gem install whispercpp +You can pass build options for whisper.cpp, for instance: + + $ bundle config build.whispercpp --enable-ggml-cuda + +or, + + $ gem install whispercpp -- --enable-ggml-cuda + +See whisper.cpp's [README](https://github.com/ggml-org/whisper.cpp/blob/master/README.md) for available options. You need convert options present the README to Ruby-style options, for example: + +Boolean options: + +* `-DGGML_BLAS=1` -> `--enable-ggml-blas` +* `-DWHISER_COREML=OFF` -> `--disable-whisper-coreml` + +Argument options: + +* `-DGGML_CUDA_COMPRESSION_MODE=size` -> `--ggml-cuda-compression-mode=size` + +Combination: + +* `-DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="86"` -> `--enable-ggml-cuda --cmake_cuda-architectures="86"` + +For boolean options like `GGML_CUDA`, the README says `-DGGML_CUDA=1`. You need strip `-D`, prepend `--enable-` for `1` or `ON` (`--disable-` for `0` or `OFF`) and make it kebab-case: `--enable-ggml-cuda`. +For options which require arguments like `CMAKE_CUDA_ARCHITECTURES`, the README says `-DCMAKE_CUDA_ARCHITECTURES="86"`. You need strip `-D`, prepend `--`, make it kebab-case, append `=` and append argument: `--cmake-cuda-architectures="86"`. + Usage ----- @@ -44,17 +70,6 @@ end Some models are prepared up-front: -```ruby -base_en = Whisper::Model.pre_converted_models["base.en"] -whisper = Whisper::Context.new(base_en) -``` - -At first time you use a model, it is downloaded automatically. After that, downloaded cached file is used. To clear cache, call `#clear_cache`: - -```ruby -Whisper::Model.pre_converted_models["base"].clear_cache -``` - You also can use shorthand for pre-converted models: ```ruby @@ -79,6 +94,19 @@ puts Whisper::Model.pre_converted_models.keys # : ``` +You can also retrieve each model: + +```ruby +base_en = Whisper::Model.pre_converted_models["base.en"] +whisper = Whisper::Context.new(base_en) +``` + +At first time you use a model, it is downloaded automatically. After that, downloaded cached file is used. To clear cache, call `#clear_cache`: + +```ruby +Whisper::Model.pre_converted_models["base"].clear_cache +``` + You can also use local model files you prepared: ```ruby @@ -99,9 +127,80 @@ See [models][] page for details. Currently, whisper.cpp accepts only 16-bit WAV files. +### Voice Activity Detection (VAD) ### + +Support for Voice Activity Detection (VAD) can be enabled by setting `Whisper::Params`'s `vad` argument to `true` and specifying VAD model: + +```ruby +Whisper::Params.new( + vad: true, + vad_model_path: "silero-v5.1.2", + # other arguments... +) +``` + +When you pass the model name (`"silero-v5.1.2"`) or URI (`https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin`), it will be downloaded automatically. +Currently, "silero-v5.1.2" is registered as pre-converted model like ASR models. You also specify file path or URI of model. + +If you need configure VAD behavior, pass params for that: + +```ruby +Whisper::Params.new( + vad: true, + vad_model_path: "silero-v5.1.2", + vad_params: Whisper::VAD::Params.new( + threshold: 1.0, # defaults to 0.5 + min_speech_duration_ms: 500, # defaults to 250 + min_silence_duration_ms: 200, # defaults to 100 + max_speech_duration_s: 30000, # default is FLT_MAX, + speech_pad_ms: 50, # defaults to 30 + samples_overlap: 0.5 # defaults to 0.1 + ), + # other arguments... +) +``` + +For details on VAD, see [whisper.cpp's README](https://github.com/ggml-org/whisper.cpp?tab=readme-ov-file#voice-activity-detection-vad). + +### Output ### + +whispercpp supports SRT and WebVTT output: + +```ruby +puts whisper.transcribe("path/to/audio.wav", Whisper::Params.new).to_webvtt +# => +WEBVTT + +1 +00:00:00.000 --> 00:00:03.860 + My thought I have nobody by a beauty and will as you poured. + +2 +00:00:03.860 --> 00:00:09.840 + Mr. Rochester is sub in that so-don't find simplest, and devoted about, to let might in + +3 +00:00:09.840 --> 00:00:09.940 + a + +``` + +You may call `#to_srt`, too + + API --- +### Transcription ### + +By default, `Whisper::Context#transcribe` works in a single thread. You can make it work in parallel by passing `n_processors` option: + +```ruby +whisper.transcribe("path/to/audio.wav", params, n_processors: Etc.nprocessors) +``` + +Note that transcription occasionally might be low accuracy when it works in parallel. + ### Segments ### Once `Whisper::Context#transcribe` called, you can retrieve segments by `#each_segment`: @@ -123,7 +222,7 @@ whisper ed: format_time(segment.end_time), text: segment.text } - line << " (speaker turned)" if segment.speaker_next_turn? + line << " (speaker turned)" if segment.speaker_turn_next? puts line end @@ -139,7 +238,7 @@ params.on_new_segment do |segment| ed: format_time(segment.end_time), text: segment.text } - line << " (speaker turned)" if segment.speaker_next_turn? + line << " (speaker turned)" if segment.speaker_turn_next? puts line end @@ -228,7 +327,7 @@ The second argument `samples` may be an array, an object with `length` and `each Development ----------- - % git clone https://github.com/ggerganov/whisper.cpp.git + % git clone https://github.com/ggml-org/whisper.cpp.git % cd whisper.cpp/bindings/ruby % rake test @@ -236,10 +335,15 @@ First call of `rake test` builds an extension and downloads a model for testing. If something seems wrong on build, running `rake clean` solves some cases. +### Need help ### + +* Windows support +* Refinement of C/C++ code, especially memory management + License ------- The same to [whisper.cpp][]. -[whisper.cpp]: https://github.com/ggerganov/whisper.cpp -[models]: https://github.com/ggerganov/whisper.cpp/tree/master/models +[whisper.cpp]: https://github.com/ggml-org/whisper.cpp +[models]: https://github.com/ggml-org/whisper.cpp/tree/master/models diff --git a/bindings/ruby/Rakefile b/bindings/ruby/Rakefile index 0d52e88a31a..d9a66030de4 100644 --- a/bindings/ruby/Rakefile +++ b/bindings/ruby/Rakefile @@ -3,11 +3,15 @@ require "bundler/gem_tasks" require "rake/testtask" require_relative "extsources" +SOURCES_DIR = "ext/sources" + SOURCES = FileList[] EXTSOURCES.each do |src| basename = src.pathmap("%f") - dest = basename == "LICENSE" ? basename : src.pathmap("%{../..,ext}p") + dest = basename == "LICENSE" ? basename + : src.pathmap("%{\\.\\./\\.\\.,#{SOURCES_DIR}}p") + .pathmap("%{\\.\\./javascript,#{SOURCES_DIR}/bindings/javascript}p") dir = dest.pathmap("%d") file src directory dir @@ -18,7 +22,6 @@ EXTSOURCES.each do |src| end CLEAN.include SOURCES -CLEAN.include FileList["ext/**/*.o", "ext/**/*.metal", "ext/**/*.tmp", "ext/whisper.{so,bundle,dll}"] SRC = FileList["ext/*.{c,cpp,h}"] @@ -36,6 +39,20 @@ file "ext/Makefile" => SRC + ["ext/extconf.rb"] + SOURCES do |t| ruby "extconf.rb" end end +if File.exist? "ext/Makefile" + task :make_clean do + cd "ext" do + sh "make", "clean" + end + end + task clean: :make_clean + task :make_distclean do + cd "ext" do + sh "make", "distclean" + end + end + task clobber: :make_distclean +end file SO_FILE => "ext/Makefile" do |t| chdir "ext" do @@ -50,17 +67,30 @@ file LIB_FILE => [SO_FILE, "lib"] do |t| end CLEAN.include LIB_FILE -Rake::TestTask.new do |t| - t.test_files = FileList["tests/test_*.rb"] +Rake::TestTask.new + +TEST_FIXTURE_AUDIO = "test/fixtures/jfk.wav" +TEST_FIXTURE_AUDIO_SRC = File.expand_path(File.join(__dir__, "..", "..", "samples", "jfk.wav")) +TEST_FIXTURE_AUDIO_DIR = TEST_FIXTURE_AUDIO.pathmap("%d") +directory TEST_FIXTURE_AUDIO_DIR +if File.exist? TEST_FIXTURE_AUDIO_SRC + file TEST_FIXTURE_AUDIO => [TEST_FIXTURE_AUDIO_SRC, TEST_FIXTURE_AUDIO_DIR] do |t| + symlink t.source, t.name + end +else + require "open-uri" + file TEST_FIXTURE_AUDIO => TEST_FIXTURE_AUDIO_DIR do |t| + File.write t.name, URI("https://github.com/ggml-org/whisper.cpp/raw/refs/heads/master/samples/jfk.wav").read + end end -TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}" -file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t| - chdir "tests/jfk_reader" do +TEST_MEMORY_VIEW = "test/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}" +file TEST_MEMORY_VIEW => "test/jfk_reader/jfk_reader.c" do |t| + chdir "test/jfk_reader" do ruby "extconf.rb" sh "make" end end -CLEAN.include "tests/jfk_reader/jfk_reader.{o,#{RbConfig::CONFIG['DLEXT']}}" +CLEAN.include TEST_MEMORY_VIEW -task test: [LIB_FILE, TEST_MEMORY_VIEW] +task test: [LIB_FILE, TEST_MEMORY_VIEW, TEST_FIXTURE_AUDIO] diff --git a/bindings/ruby/ext/.gitignore b/bindings/ruby/ext/.gitignore index 7703146ff8e..6fd36e40e28 100644 --- a/bindings/ruby/ext/.gitignore +++ b/bindings/ruby/ext/.gitignore @@ -2,10 +2,8 @@ Makefile whisper.so whisper.bundle whisper.dll -scripts/get-flags.mk *.o -/*/**/*.c -/*/**/*.cpp -/*/**/*.h -/*/**/*.m -/*/**/*.metal +*.a +sources/* +!sources/CMakeGraphVizOptions.cmake +mkmf.log diff --git a/bindings/ruby/ext/cpu.mk b/bindings/ruby/ext/cpu.mk deleted file mode 100644 index e617d69da56..00000000000 --- a/bindings/ruby/ext/cpu.mk +++ /dev/null @@ -1,11 +0,0 @@ -ggml/src/ggml-cpu/ggml-cpu-cpp.o: \ - ggml/src/ggml-cpu/ggml-cpu.cpp \ - ggml/src/ggml-cpu/unary-ops.cpp \ - ggml/src/ggml-cpu/binary-ops.cpp \ - ggml/include/ggml-backend.h \ - ggml/include/ggml.h \ - ggml/include/ggml-alloc.h \ - ggml/src/ggml-backend-impl.h \ - ggml/include/ggml-cpu.h \ - ggml/src/ggml-impl.h - $(CXX) $(CXXFLAGS) -c $< -o $@ diff --git a/bindings/ruby/ext/dependencies.rb b/bindings/ruby/ext/dependencies.rb new file mode 100644 index 00000000000..2ba4b94b62b --- /dev/null +++ b/bindings/ruby/ext/dependencies.rb @@ -0,0 +1,73 @@ +require "tsort" + +class Dependencies + include TSort + + def initialize(cmake, options) + @cmake = cmake + @options = options + @static_lib_shape = nil + @nodes = {} + @graph = Hash.new {|h, k| h[k] = []} + + generate_dot + parse_dot + end + + def libs + tsort.filter_map {|node| + label, shape = @nodes[node] + if shape == @static_lib_shape + label.gsub(/\\n\([^)]+\)/, '') + else + nil + end + }.reverse.collect {|lib| "lib#{lib}.a"} + end + + def to_s + libs.join(" ") + end + + private + + def dot_path + File.join(__dir__, "build", "whisper.cpp.dot") + end + + def generate_dot + args = ["-S", "sources", "-B", "build", "--graphviz", dot_path, "-D", "BUILD_SHARED_LIBS=OFF"] + args << @options.to_s unless @options.to_s.empty? + system @cmake, *args, exception: true + end + + def parse_dot + File.open(dot_path).each_line do |line| + case line + when /\[\s*label\s*=\s*"Static Library"\s*,\s*shape\s*=\s*(?\w+)\s*\]/ + @static_lib_shape = $~[:shape] + when /\A\s*"(?\w+)"\s*\[\s*label\s*=\s*"(?